module Data.Alpino.Model.Enumerator ( bestScore,
concat,
groupBy,
groupByKey,
filterFeatures,
filterFeaturesFunctor,
instanceGenerator,
instanceParser,
lineEnum,
printByteString,
randomSample,
scoreToBinary,
scoreToBinaryNorm,
scoreToNorm
) where
import Prelude hiding (concat, filter, mapM)
import Control.Exception.Base (Exception)
import Control.Monad.IO.Class (MonadIO(..), liftIO)
import Control.Monad.Trans.Class (lift)
import qualified Data.Alpino.Model as AM
import qualified Data.ByteString as B
import qualified Data.ByteString.UTF8 as BU
import qualified Data.Enumerator.List as EL
import Data.Enumerator hiding (isEOF, head, length, map)
import qualified Data.Set as Set
import Data.Typeable
import System.IO (isEOF)
import System.Random (getStdRandom, split)
data InvalidDataException = InvalidDataException String
deriving Typeable
instance Exception InvalidDataException
instance Show InvalidDataException where
show (InvalidDataException e) = show e
bestScore :: (Monad m) =>
Enumeratee [AM.TrainingInstance] Double m b
bestScore = EL.map AM.bestScore'
filterFeatures :: (Monad m) => (Bool -> Bool) -> Set.Set B.ByteString ->
Enumeratee AM.TrainingInstance AM.TrainingInstance m b
filterFeatures f keepFeatures = EL.map (AM.filterFeatures f keepFeatures)
filterFeaturesFunctor :: (Monad m) => (Bool -> Bool) -> Set.Set B.ByteString ->
Enumeratee AM.TrainingInstance AM.TrainingInstance m b
filterFeaturesFunctor f keepFeatures =
EL.map (AM.filterFeaturesFunctor f keepFeatures)
groupBy :: (Monad m, Eq a) => (a -> a -> Bool) ->
Enumeratee a [a] m b
groupBy f = loop
where loop (Continue k) = do
h <- peek
case h of
Nothing -> return $ Continue k
Just e -> do
xs <- EL.takeWhile $ f e
newStep <- lift $ runIteratee $ k $ Chunks [xs]
loop newStep
loop step = return step
groupByKey :: (Monad m) =>
Enumeratee AM.TrainingInstance [AM.TrainingInstance] m b
groupByKey = groupBy keyEq
where keyEq i1 i2 = AM.instanceType i1 == AM.instanceType i2 &&
AM.instanceKey i1 == AM.instanceKey i2
instanceParser :: (Monad m) =>
Enumeratee BU.ByteString AM.TrainingInstance m b
instanceParser = mapMaybeEnum (InvalidDataException "Could not parse instance.")
AM.bsToTrainingInstance
instanceGenerator :: (Monad m) =>
Enumeratee AM.TrainingInstance B.ByteString m b
instanceGenerator = EL.map AM.trainingInstanceToBs
lineEnum :: MonadIO m => Enumerator B.ByteString m b
lineEnum = Iteratee . loop
where loop (Continue k) = do
eof <- liftIO isEOF
case eof of
True -> return $ Continue k
False -> do
line <- liftIO B.getLine
runIteratee (k (Chunks [line])) >>= loop
loop step = return step
concat :: (Monad m) =>
Enumeratee [a] a m b
concat = loop
where loop (Continue k) = do
h <- EL.head
case h of
Nothing -> return $ Continue k
Just e -> do
newStep <- lift $ runIteratee $ k $ Chunks e
loop newStep
loop step = return step
mapMaybeEnum :: (Exception e, Monad m) => e -> (ao -> Maybe ai) ->
Enumeratee ao ai m b
mapMaybeEnum exception f = loop where
loop = checkDone $ continue . step
step k EOF = yield (Continue k) EOF
step k (Chunks []) = continue $ step k
step k (Chunks xs) = case mapMaybeMaybe f xs of
Just ys -> k (Chunks ys) >>== loop
Nothing -> throwError exception
mapMaybeMaybe :: (a -> Maybe b) -> [a] -> Maybe [b]
mapMaybeMaybe _ [] = Just []
mapMaybeMaybe f (x:xs) = do
r <- f x
rs <- mapMaybeMaybe f xs
return $ r:rs
printByteString :: MonadIO m => Iteratee B.ByteString m ()
printByteString = continue step
where step (Chunks []) = continue step
step (Chunks xs) = liftIO (mapM_ B.putStrLn xs) >> continue step
step EOF = yield () EOF
randomSample :: (MonadIO m) => Int ->
Enumeratee [AM.TrainingInstance] [AM.TrainingInstance] m b
randomSample n = EL.mapM (liftIO . sampleFun)
where sampleFun :: [AM.TrainingInstance] -> IO [AM.TrainingInstance]
sampleFun i = do
gen <- getStdRandom split
return $ AM.randomSample gen n i
scoreToBinary :: (Monad m) =>
Enumeratee [AM.TrainingInstance] [AM.TrainingInstance] m b
scoreToBinary = EL.map AM.scoreToBinary
scoreToBinaryNorm :: (Monad m) =>
Enumeratee [AM.TrainingInstance] [AM.TrainingInstance] m b
scoreToBinaryNorm = EL.map AM.scoreToBinaryNorm
scoreToNorm :: (Monad m) =>
Enumeratee [AM.TrainingInstance] [AM.TrainingInstance] m b
scoreToNorm = EL.map AM.scoreToNorm