module Data.CRF.Chain2.Pair
(
Word (..)
, mkWord
, Sent
, Dist (unDist)
, mkDist
, WordL
, SentL
, Ob (..)
, Lb1 (..)
, Lb2 (..)
, Lb
, Feat (..)
, CRF (..)
, train
, tag
, FeatSel
, selectHidden
, selectPresent
) where
import Control.Applicative ((<$>), (<*>))
import Data.Binary (Binary, get, put)
import qualified Numeric.SGD as SGD
import Data.CRF.Chain2.Generic.Model
(Model, FeatSel, selectHidden, selectPresent, core, withCore)
import Data.CRF.Chain2.Generic.Codec
import Data.CRF.Chain2.Generic.External
import qualified Data.CRF.Chain2.Generic.Inference as I
import qualified Data.CRF.Chain2.Generic.Train as T
import Data.CRF.Chain2.Pair.Base
import Data.CRF.Chain2.Pair.FeatMap
import Data.CRF.Chain2.Pair.Codec (codec, CodecData)
data CRF a b c = CRF
{ codecData :: CodecData a b c
, model :: Model FeatMap Ob Lb Feat }
instance (Ord a, Ord b, Ord c, Binary a, Binary b, Binary c)
=> Binary (CRF a b c) where
put CRF{..} = put codecData >> put (core model)
get = CRF <$> get <*> do
_core <- get
return $ withCore _core featGen
codecSpec
:: (Ord a, Ord b, Ord c)
=> T.CodecSpec a (b, c) (CodecData a b c) Ob Lb
codecSpec = T.CodecSpec
{ T.mkCodec = mkCodec codec
, T.encode = encodeDataL codec }
train
:: (Ord a, Ord b, Ord c)
=> SGD.SgdArgs
-> FeatSel Ob Lb Feat
-> IO [SentL a (b, c)]
-> Maybe (IO [SentL a (b, c)])
-> IO (CRF a b c)
train sgdArgs featSel trainIO evalIO'Maybe = do
(_codecData, _model) <- T.train
sgdArgs
codecSpec
featGen
featSel
trainIO
evalIO'Maybe
return $ CRF _codecData _model
tag :: (Ord a, Ord b, Ord c) => CRF a b c -> Sent a (b, c) -> [(b, c)]
tag CRF{..} sent
= onWords . decodeLabels codec codecData
. I.tag model . encodeSent codec codecData
$ sent
where
onWords xs =
[ unJust codec codecData word x
| (word, x) <- zip sent xs ]