{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
module NLP.Concraft.DAG.Schema
(
Ob
, Ox
, Schema
, void
, sequenceS_
, schematize
, Body (..)
, Entry
, entry
, entryWith
, SchemaConf (..)
, nullConf
, fromConf
, Block
, fromBlock
, orthB
, lowOrthB
, lowPrefixesB
, lowSuffixesB
, knownB
, shapeB
, packedB
, begPackedB
) where
import Control.Applicative ((<$>), (<*>), pure)
import Control.Monad (forM_, guard)
import Data.Binary (Binary, put, get)
import Data.Maybe (maybeToList)
import qualified Data.Text as T
import qualified Control.Monad.Ox as Ox
import qualified Control.Monad.Ox.Text as Ox
import qualified Data.DAG as DAG
import Data.DAG (DAG, EdgeID)
import qualified NLP.Concraft.DAG.Morphosyntax as X
type Ob = ([Int], T.Text)
type Ox a = Ox.Ox T.Text a
type Schema w t a = X.Sent w t -> EdgeID -> Ox a
void :: a -> Schema w t a
void x _ _ = return x
sequenceS_
:: [X.Sent w t -> a -> Ox b]
-> X.Sent w t -> a -> Ox ()
sequenceS_ xs sent =
let ys = map ($sent) xs
in \k -> sequence_ (map ($k) ys)
data BaseOb = BaseOb
{ orth :: EdgeID -> Maybe T.Text
, lowOrth :: EdgeID -> Maybe T.Text }
mkBaseOb :: X.Word w => X.Sent w t -> BaseOb
mkBaseOb sent = BaseOb
{ orth = _orth
, lowOrth = _lowOrth }
where
at = onEdgeWith sent
_orth = (X.orth `at`)
_lowOrth i = T.toLower <$> _orth i
type Block w t a = X.Sent w t -> [EdgeID] -> Ox a
fromBlock :: X.Word w => Block w t a -> [Int] -> Bool -> Schema w t a
fromBlock blk xs oovOnly sent = \i ->
blkSent $ do
x <- xs
j <- maybeToList $ shift x i sent
guard $ oov j
return j
where
blkSent = blk sent
oov k = if not oovOnly
then True
else maybe False id $ X.oov `at` k
at = onEdgeWith sent
orthB :: X.Word w => Block w t ()
orthB sent = \ks ->
let orthOb = onEdgeWith sent X.orth
in mapM_ (Ox.save . orthOb) ks
lowOrthB :: X.Word w => Block w t ()
lowOrthB sent = \ks ->
let BaseOb{..} = mkBaseOb sent
in mapM_ (Ox.save . lowOrth) ks
lowPrefixesB :: X.Word w => [Int] -> Block w t ()
lowPrefixesB ns sent = \ks ->
forM_ ks $ \i ->
mapM_ (Ox.save . lowPrefix i) ns
where
BaseOb{..} = mkBaseOb sent
lowPrefix i j = Ox.prefix j =<< lowOrth i
lowSuffixesB :: X.Word w => [Int] -> Block w t ()
lowSuffixesB ns sent = \ks ->
forM_ ks $ \i ->
mapM_ (Ox.save . lowSuffix i) ns
where
BaseOb{..} = mkBaseOb sent
lowSuffix i j = Ox.suffix j =<< lowOrth i
knownB :: X.Word w => Block w t ()
knownB sent = \ks -> do
mapM_ (Ox.save . knownAt) ks
where
at = onEdgeWith sent
knownAt i = boolF <$> (not . X.oov) `at` i
boolF True = "T"
boolF False = "F"
shapeB :: X.Word w => Block w t ()
shapeB sent = \ks -> do
mapM_ (Ox.save . shape) ks
where
BaseOb{..} = mkBaseOb sent
shape i = Ox.shape <$> orth i
packedB :: X.Word w => Block w t ()
packedB sent = \ks -> do
mapM_ (Ox.save . shapeP) ks
where
BaseOb{..} = mkBaseOb sent
shape i = Ox.shape <$> orth i
shapeP i = Ox.pack <$> shape i
begPackedB :: X.Word w => Block w t ()
begPackedB sent = \ks -> do
mapM_ (Ox.save . begPacked) ks
where
BaseOb{..} = mkBaseOb sent
shape i = Ox.shape <$> orth i
shapeP i = Ox.pack <$> shape i
begPacked i = isBeg i <> pure "-" <> shapeP i
isBeg i = (Just . boolF) (i == 0)
boolF True = "T"
boolF False = "F"
x <> y = T.append <$> x <*> y
data Body a = Body {
range :: [Int]
, oovOnly :: Bool
, args :: a }
deriving (Show)
instance Binary a => Binary (Body a) where
put Body{..} = put range >> put oovOnly >> put args
get = Body <$> get <*> get <*> get
type Entry a = Maybe (Body a)
entryWith :: a -> [Int] -> Entry a
entryWith v xs = Just (Body xs False v)
entry :: [Int] -> Entry ()
entry = entryWith ()
data SchemaConf = SchemaConf {
orthC :: Entry ()
, lowOrthC :: Entry ()
, lowPrefixesC :: Entry [Int]
, lowSuffixesC :: Entry [Int]
, knownC :: Entry ()
, shapeC :: Entry ()
, packedC :: Entry ()
, begPackedC :: Entry ()
} deriving (Show)
instance Binary SchemaConf where
put SchemaConf{..} = do
put orthC
put lowOrthC
put lowPrefixesC
put lowSuffixesC
put knownC
put shapeC
put packedC
put begPackedC
get = SchemaConf
<$> get <*> get <*> get <*> get
<*> get <*> get <*> get <*> get
nullConf :: SchemaConf
nullConf = SchemaConf
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
mkArg0 :: X.Word w => Block w t () -> Entry () -> Schema w t ()
mkArg0 blk (Just x) = fromBlock blk (range x) (oovOnly x)
mkArg0 _ Nothing = void ()
mkArg1 :: X.Word w => (a -> Block w t ()) -> Entry a -> Schema w t ()
mkArg1 blk (Just x) = fromBlock (blk (args x)) (range x) (oovOnly x)
mkArg1 _ Nothing = void ()
fromConf :: X.Word w => SchemaConf -> Schema w t ()
fromConf SchemaConf{..} = sequenceS_
[ mkArg0 orthB orthC
, mkArg0 lowOrthB lowOrthC
, mkArg1 lowPrefixesB lowPrefixesC
, mkArg1 lowSuffixesB lowSuffixesC
, mkArg0 knownB knownC
, mkArg0 shapeB shapeC
, mkArg0 packedB packedC
, mkArg0 begPackedB begPackedC ]
schematize :: Schema w t a -> X.Sent w t -> DAG () [Ob]
schematize schema sent =
let f = const . Ox.execOx . schema sent
in DAG.mapE f sent
onEdgeWith :: DAG x a -> (a -> b) -> EdgeID -> Maybe b
onEdgeWith dag f k = f <$> DAG.maybeEdgeLabel k dag
onEdgeWith' :: DAG x a -> (a -> [b]) -> EdgeID -> [b]
onEdgeWith' dag f k =
g $ f <$> DAG.maybeEdgeLabel k dag
where
g Nothing = []
g (Just xs) = xs
shift
:: Int
-> EdgeID
-> DAG a b
-> Maybe EdgeID
shift k i dag
| k > 0 = do
j <- mayHead $ DAG.nextEdges i dag
shift (k - 1) j dag
| k < 0 = do
j <- mayTail $ DAG.prevEdges i dag
shift (k + 1) j dag
| otherwise = return i
where
mayHead (x:xs) = Just x
mayHead [] = Nothing
mayTail = mayHead . reverse