module NLP.Nerf.Schema
(
Ox
, Schema
, void
, sequenceS_
, schematize
, Body (..)
, Entry
, entry
, entryWith
, SchemaConf (..)
, nullConf
, defaultConf
, fromConf
, Block
, fromBlock
, orthB
, splitOrthB
, lowPrefixesB
, lowSuffixesB
, lemmaB
, shapeB
, packedB
, shapePairB
, packedPairB
, dictB
) where
import Control.Applicative ((<$>), (<*>))
import Control.Monad (forM_, join)
import Data.Maybe (maybeToList)
import Data.Binary (Binary, put, get)
import qualified Data.Char as C
import qualified Data.Set as S
import qualified Data.Vector as V
import qualified Data.Text as T
import qualified Data.DAWG.Static as D
import qualified Data.CRF.Chain1 as CRF
import qualified Control.Monad.Ox as Ox
import qualified Control.Monad.Ox.Text as Ox
import NLP.Nerf.Types
import NLP.Nerf.Dict (Dict)
type Ox a = Ox.Ox Word T.Text a
type Schema a = V.Vector Word -> Int -> Ox a
void :: a -> Schema a
void x _ _ = return x
sequenceS_ :: [V.Vector Word -> a -> Ox b] -> V.Vector Word -> a -> Ox ()
sequenceS_ xs sent =
let ys = map ($sent) xs
in \k -> sequence_ (map ($k) ys)
data BaseOb = BaseOb
{ orth :: Int -> Maybe T.Text
, lowOrth :: Int -> Maybe T.Text }
mkBaseOb :: V.Vector Word -> BaseOb
mkBaseOb sent = BaseOb
{ orth = _orth
, lowOrth = _lowOrth }
where
at = Ox.atWith sent
_orth = (id `at`)
_lowOrth i = T.toLower <$> _orth i
type Block a = V.Vector Word -> [Int] -> Ox a
fromBlock :: Block a -> [Int] -> Schema a
fromBlock blk xs sent =
let blkSent = blk sent
in \k -> blkSent [x + k | x <- xs]
orthB :: Block ()
orthB sent = \ks ->
let orthOb = Ox.atWith sent id
in mapM_ (Ox.save . orthOb) ks
splitOrthB :: Block ()
splitOrthB sent = \ks -> do
mapM_ (Ox.save . lowOrth) ks
mapM_ (Ox.save . upOnlyOrth) ks
where
BaseOb{..} = mkBaseOb sent
upOnlyOrth i = orth i >>= \x -> case T.any C.isUpper x of
True -> Just x
False -> Nothing
lowPrefixesB :: [Int] -> Block ()
lowPrefixesB ns sent = \ks ->
forM_ ks $ \i ->
mapM_ (Ox.save . lowPrefix i) ns
where
BaseOb{..} = mkBaseOb sent
lowPrefix i j = Ox.prefix j =<< lowOrth i
lowSuffixesB :: [Int] -> Block ()
lowSuffixesB ns sent = \ks ->
forM_ ks $ \i ->
mapM_ (Ox.save . lowSuffix i) ns
where
BaseOb{..} = mkBaseOb sent
lowSuffix i j = Ox.suffix j =<< lowOrth i
lemmaB :: Int -> Block ()
lemmaB n sent = \ks -> do
mapM_ lowLemma ks
where
BaseOb{..} = mkBaseOb sent
lowPrefix i j = Ox.prefix j =<< lowOrth i
lowSuffix i j = Ox.suffix j =<< lowOrth i
lowLemma i = Ox.group $ do
mapM_ (Ox.save . lowPrefix i) [0, 1 .. n]
mapM_ (Ox.save . lowSuffix i) [0, 1 .. n]
shapeB :: Block ()
shapeB sent = \ks -> do
mapM_ (Ox.save . shape) ks
where
BaseOb{..} = mkBaseOb sent
shape i = Ox.shape <$> orth i
packedB :: Block ()
packedB sent = \ks -> do
mapM_ (Ox.save . shapeP) ks
where
BaseOb{..} = mkBaseOb sent
shape i = Ox.shape <$> orth i
shapeP i = Ox.pack <$> shape i
shapePairB :: Block ()
shapePairB sent = \ks ->
forM_ ks $ \i -> do
Ox.save $ link <$> shape i <*> shape (i 1)
where
BaseOb{..} = mkBaseOb sent
shape i = Ox.shape <$> orth i
link x y = T.concat [x, "-", y]
packedPairB :: Block ()
packedPairB sent = \ks ->
forM_ ks $ \i -> do
Ox.save $ link <$> shapeP i <*> shapeP (i 1)
where
BaseOb{..} = mkBaseOb sent
shape i = Ox.shape <$> orth i
shapeP i = Ox.pack <$> shape i
link x y = T.concat [x, "-", y]
dictB :: Dict -> Block ()
dictB dict sent = \ks -> do
mapM_ (Ox.saves . searchDict) ks
where
BaseOb{..} = mkBaseOb sent
searchDict i = join . maybeToList $
S.toList <$> (orth i >>= flip D.lookup dict . T.unpack)
data Body a = Body {
range :: [Int]
, args :: a }
deriving (Show)
instance Binary a => Binary (Body a) where
put Body{..} = put range >> put args
get = Body <$> get <*> get
type Entry a = Maybe (Body a)
entryWith :: a -> [Int] -> Entry a
entryWith v xs = Just (Body xs v)
entryWith'Mb :: Maybe a -> [Int] -> Entry a
entryWith'Mb (Just v) xs = Just (Body xs v)
entryWith'Mb Nothing _ = Nothing
entry :: [Int] -> Entry ()
entry = entryWith ()
data SchemaConf = SchemaConf {
orthC :: Entry ()
, splitOrthC :: Entry ()
, lowPrefixesC :: Entry [Int]
, lowSuffixesC :: Entry [Int]
, lemmaC :: Entry Int
, shapeC :: Entry ()
, packedC :: Entry ()
, shapePairC :: Entry ()
, packedPairC :: Entry ()
, dictC :: Entry [Dict]
, intTrigsC :: Entry Dict
, extTrigsC :: Entry Dict
} deriving (Show)
instance Binary SchemaConf where
put SchemaConf{..} = do
put orthC
put splitOrthC
put lowPrefixesC
put lowSuffixesC
put lemmaC
put shapeC
put packedC
put shapePairC
put packedPairC
put dictC
put intTrigsC
put extTrigsC
get = SchemaConf
<$> get <*> get <*> get <*> get
<*> get <*> get <*> get <*> get
<*> get <*> get <*> get <*> get
nullConf :: SchemaConf
nullConf = SchemaConf
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
defaultConf
:: [Dict]
-> Maybe Dict
-> Maybe Dict
-> IO SchemaConf
defaultConf neDicts intDict extDict = do
return $ SchemaConf
{ orthC = Nothing
, splitOrthC = entry [1, 0]
, lowPrefixesC = Nothing
, lowSuffixesC = entryWith [2, 3, 4] [0]
, lemmaC = entryWith 3 [1, 0]
, shapeC = entry [1, 0]
, packedC = entry [1, 0]
, shapePairC = entry [0]
, packedPairC = entry [0]
, dictC = entryWith neDicts [1, 0]
, intTrigsC = entryWith'Mb intDict [0]
, extTrigsC = entryWith'Mb extDict [1] }
mkArg0 :: Block () -> Entry () -> Schema ()
mkArg0 blk (Just x) = fromBlock blk (range x)
mkArg0 _ Nothing = void ()
mkArg1 :: (a -> Block ()) -> Entry a -> Schema ()
mkArg1 blk (Just x) = fromBlock (blk (args x)) (range x)
mkArg1 _ Nothing = void ()
mkArgs1 :: (a -> Block ()) -> Entry [a] -> Schema ()
mkArgs1 blk (Just x) = sequenceS_
[ fromBlock
(blk dict)
(range x)
| dict <- args x ]
mkArgs1 _ Nothing = void ()
fromConf :: SchemaConf -> Schema ()
fromConf SchemaConf{..} = sequenceS_
[ mkArg0 orthB orthC
, mkArg0 splitOrthB splitOrthC
, mkArg1 lowPrefixesB lowPrefixesC
, mkArg1 lowSuffixesB lowSuffixesC
, mkArg1 lemmaB lemmaC
, mkArg0 shapeB shapeC
, mkArg0 packedB packedC
, mkArg0 shapePairB shapePairC
, mkArg0 packedPairB packedPairC
, mkArgs1 dictB dictC
, mkArg1 dictB intTrigsC
, mkArg1 dictB extTrigsC ]
schematize :: Schema a -> [Word] -> CRF.Sent Ob
schematize schema xs =
map (S.fromList . Ox.execOx . schema v) [0 .. n 1]
where
v = V.fromList xs
n = V.length v