module Main (main) where

import Data.List
import Control.Monad
import qualified Data.Map as Map
import System.Environment
import System.IO
import qualified Data.HashMap.Strict as HM
import Data.Hashable
import Text.Read

import System.Posix.Process

{- mkv types -}
data GenType = R | M0 | M1 | M2
    deriving (Show, Ord, Eq, Read)

data MkvState a = NoState | OneState !a | TwoState !a !a
    deriving (Show, Ord, Eq, Read)

data LvlState a = LvlState !Int !(MkvState a)
    deriving (Show, Ord, Eq, Read)

data InnerKey a = Raw | Mkv0 !a | Mkv1 !a !a | Mkv2 !a !a !a | Init0 !a | Init1 !a !a
    deriving (Show, Ord, Eq, Read)

type InnerStat a = HM.HashMap (InnerKey a) Int

data PatternType = Upper | Lower | Numeric | Special | All
    deriving (Show, Ord, Eq, Read)

data Stats = Stats
    !(Map.Map PatternType (InnerStat Char))
    !(InnerStat PatternType)
    deriving (Show, Read)

instance (Read k, Read b, Hashable k, Eq k) => Read (HM.HashMap k b) where
    readPrec = parens $ prec 10 $ do
        Ident "fromList" <- lexP
        xs <- readPrec
        return (HM.fromList xs)
    readListPrec = readListPrecDefault

instance Hashable a => Hashable (InnerKey a) where
    hash Raw = 0
    hash (Mkv0  x)      = hash x
    hash (Mkv1  x y)    = (hash x) + (hash y)*257
    hash (Mkv2  x y z)  = (hash x) + (hash y)*257 + (hash z)*79
    hash (Init0 x)      = hash x + 709
    hash (Init1 x y)    = (hash x) + (hash y)*257 + 709

instance Hashable a => Hashable (MkvState a) where
    hash NoState = 0
    hash (OneState x) = hash x
    hash (TwoState x y) = (hash x) + (hash y)*257

instance Hashable a => Hashable (LvlState a) where
    hash (LvlState i s) = (hash s)*499 + (fromIntegral i)

instance Hashable PatternType where
    hash Upper   = 0
    hash Lower   = 1
    hash Numeric = 2
    hash Special = 3
    hash All     = 4

gmkv0 :: (Hashable a, Eq a) => [a] -> InnerStat a -> InnerStat a
gmkv0 !n !e = foldl' (\ !mp !x -> HM.insertWith (+) (Mkv0 x) 1 mp) e n
gmkv1 :: (Hashable a, Eq a) => [a] -> InnerStat a -> InnerStat a
gmkv1 !n !e = foldl' (\ !mp (!x,!y) -> HM.insertWith (+) (Mkv1 x y) 1 mp) e (zip n (tail n))
gmkv2 :: (Hashable a, Eq a) => [a] -> InnerStat a -> InnerStat a
gmkv2 !n !e = foldl' (\ !mp (!x,!y,!z) -> HM.insertWith (+) (Mkv2 x y z) 1 mp) e (zip3 n (tail n) (tail (tail n)))
gini0 :: (Hashable a, Eq a) => [a] -> InnerStat a -> InnerStat a
gini0 !(n:_) !e = HM.insertWith (+) (Init0 n) 1 e
gini0 _ !e = e
gini1 :: (Hashable a, Eq a) => [a] -> InnerStat a -> InnerStat a
gini1 !(m:n:_) !e = HM.insertWith (+) (Init1 m n) 1 e
gini1 _ !e = e

getPattern :: Char -> PatternType
getPattern x | (x >= 'a' && x <= 'z') = Lower
             | (x >= 'A' && x <= 'Z') = Upper
             | (x >= '0' && x <= '9') = Numeric
             | otherwise              = Special

breakpatterns :: String -> [(PatternType, String)]
breakpatterns "" = []
breakpatterns cs = (curpattern, curpart) : breakpatterns rs
    where
        curpattern = getPattern (head cs)
        (curpart, rs) = break (\x -> getPattern x /= curpattern) cs

--getipatterns :: Map.Map PatternType (InnerStat Char) -> (PatternType, String) -> Map.Map PatternType (InnerStat Char)
getipatterns :: Map.Map PatternType (InnerStat Char) -> (PatternType, String) -> Map.Map PatternType (InnerStat Char)
getipatterns !curmap (!curtype, !curstr) = let
    !curstat = case Map.lookup curtype curmap of
        Just x  -> x
        Nothing -> HM.empty
    !i0 = gmkv0 curstr curstat
    !i1 = gmkv1 curstr i0
    !i2 = gmkv2 curstr i1
    !i3 = gini0 curstr i2
    !i4 = gini1 curstr i3
    !out = Map.insert curtype i4 curmap
    in out

addlinestat :: Stats -> String -> Stats
addlinestat !(Stats patmkv topmkv) !curline = newstats
    where
    !linepatterns = breakpatterns curline :: [(PatternType, String)]
    !patterns = map (\(!ptype, !str) -> (ptype, str)) linepatterns :: [(PatternType, String)]
    !onlyptype = map fst patterns   :: [PatternType]
    !t0 = gmkv0 onlyptype topmkv    :: InnerStat PatternType
    !t1 = gmkv1 onlyptype t0        :: InnerStat PatternType
    !t2 = gmkv2 onlyptype t1        :: InnerStat PatternType
    !t3 = gini0 onlyptype t2        :: InnerStat PatternType
    !t4 = gini1 onlyptype t3        :: InnerStat PatternType
    !ntopmkv = t4                   :: InnerStat PatternType
    !npatmkv = foldl' getipatterns patmkv linepatterns :: Map.Map PatternType (InnerStat Char)
    !newstats = Stats npatmkv ntopmkv

calcstats :: [String] -> Stats
calcstats = foldl' addlinestat $! Stats Map.empty HM.empty

downgrade' :: (Show a) => InnerKey a -> Either (MkvState a) (MkvState a)
downgrade' (Mkv0 _    ) = Right NoState
downgrade' (Mkv1 x _  ) = Right $ OneState x
downgrade' (Mkv2 x y _) = Right $ TwoState x y
downgrade' (Init0 _   ) = Left NoState
downgrade' (Init1 x _ ) = Left $ OneState x
downgrade' (_         ) = Right NoState

-- computes log
gl :: (Eq a, Hashable a, Show a) => InnerStat a -> InnerStat a
gl mp = let
    totals = HM.fromListWith (+) $! map (\(x,l) -> (downgrade' x,l)) $! HM.toList mp
    calclog (k,x) =
        let ctotal = fromIntegral $! totals HM.! (downgrade' $! k) :: Double
            r = truncate $! -10 * (log ((fromIntegral x)/ctotal))
        in if r==0
            then (k,1)
            else (k,r)
    in HM.fromList $! map calclog $! HM.toList mp

main :: IO ()
main = do
    (dico:stats:_) <- getArgs
    getProcessID >>= print
    fh <- openFile dico ReadMode
    hSetBinaryMode fh True
    (Stats p t) <- liftM (calcstats . lines) (hGetContents fh)
    let !llogstats = Stats (Map.map gl p) (gl t)
    writeFile stats (show llogstats)
