{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Observation schema blocks for Concraft.

module NLP.Concraft.Schema
( 
-- * Types
  Ob
, Ox
, Schema
, void
, sequenceS_

-- * Usage
, schematize

-- * Configuration
, Body (..)
, Entry
, entry
, entryWith
, SchemaConf (..)
, nullConf
, fromConf

-- * Schema blocks
, Block
, fromBlock
, orthB
, lowOrthB
, lowPrefixesB
, lowSuffixesB
, knownB
, shapeB
, packedB
, begPackedB
) where

import Control.Applicative ((<$>), (<*>), pure)
import Control.Monad (forM_)
import Data.Binary (Binary, put, get)
import qualified Data.Vector as V
import qualified Data.Text as T
import qualified Control.Monad.Ox as Ox
import qualified Control.Monad.Ox.Text as Ox

import qualified NLP.Concraft.Morphosyntax as X

-- | An observation consist of an index (of list type) and an actual
-- observation value.
type Ob = ([Int], T.Text)

-- | The Ox monad specialized to word token type and text observations.
type Ox a = Ox.Ox T.Text a

-- | A schema is a block of the Ox computation performed within the
-- context of the sentence and the absolute sentence position.
type Schema w t a = V.Vector (X.Seg w t) -> Int -> Ox a

-- | A dummy schema block.
void :: a -> Schema w t a
void x _ _ = return x

-- | Sequence the list of schemas (or blocks) and discard individual values.
sequenceS_
    :: [V.Vector (X.Seg w t) -> a -> Ox b]
    ->  V.Vector (X.Seg w t) -> a -> Ox ()
sequenceS_ xs sent =
    let ys = map ($sent) xs
    in  \k -> sequence_ (map ($k) ys)

-- | Record structure of the basic observation types.
data BaseOb = BaseOb
    { orth          :: Int -> Maybe T.Text
    , lowOrth       :: Int -> Maybe T.Text }

-- | Construct the 'BaseOb' structure given the sentence.
mkBaseOb :: X.Word w => V.Vector (X.Seg w t) -> BaseOb
mkBaseOb sent = BaseOb
    { orth      = _orth
    , lowOrth   = _lowOrth }
  where
    at          = Ox.atWith sent
    _orth       = (X.orth `at`)
    _lowOrth i  = T.toLower <$> _orth i

-- | A block is a chunk of the Ox computation performed within the
-- context of the sentence and the list of absolute sentence positions.
type Block w t a = V.Vector (X.Seg w t) -> [Int] -> Ox a

-- | Transform a block to a schema depending on
-- * A list of relative sentence positions,
-- * A boolean value; if true, the block computation
--   will be performed only on positions where an OOV
--   word resides.
fromBlock :: X.Word w => Block w t a -> [Int] -> Bool -> Schema w t a
fromBlock blk xs oovOnly sent =
    \k -> blkSent [x + k | x <- xs, oov (x + k)]
  where
    blkSent = blk sent
    oov k   = if not oovOnly
        then True
        else maybe False id $ X.oov `at` k
    at      = Ox.atWith sent

-- | Orthographic form at the current position.
orthB :: X.Word w => Block w t ()
orthB sent = \ks ->
    let orthOb = Ox.atWith sent X.orth
    in  mapM_ (Ox.save . orthOb) ks

-- | Orthographic form at the current position.
lowOrthB :: X.Word w => Block w t ()
lowOrthB sent = \ks ->
    let BaseOb{..} = mkBaseOb sent
    in  mapM_ (Ox.save . lowOrth) ks

-- | List of lowercased prefixes of given lengths.
lowPrefixesB :: X.Word w => [Int] -> Block w t ()
lowPrefixesB ns sent = \ks ->
    forM_ ks $ \i ->
        mapM_ (Ox.save . lowPrefix i) ns
  where
    BaseOb{..}      = mkBaseOb sent
    lowPrefix i j   = Ox.prefix j =<< lowOrth i

-- | List of lowercased suffixes of given lengths.
lowSuffixesB :: X.Word w => [Int] -> Block w t ()
lowSuffixesB ns sent = \ks ->
    forM_ ks $ \i ->
        mapM_ (Ox.save . lowSuffix i) ns
  where
    BaseOb{..}      = mkBaseOb sent
    lowSuffix i j   = Ox.suffix j =<< lowOrth i

-- | Shape of the word.
knownB :: X.Word w => Block w t ()
knownB sent = \ks -> do
    mapM_ (Ox.save . knownAt) ks
  where
    at          = Ox.atWith sent
    knownAt i   = boolF <$> (not . X.oov) `at` i
    boolF True  = "T"
    boolF False = "F"

-- | Shape of the word.
shapeB :: X.Word w => Block w t ()
shapeB sent = \ks -> do
    mapM_ (Ox.save . shape) ks
  where
    BaseOb{..}      = mkBaseOb sent
    shape i         = Ox.shape <$> orth i

-- | Packed shape of the word.
packedB :: X.Word w => Block w t ()
packedB sent = \ks -> do
    mapM_ (Ox.save . shapeP) ks
  where
    BaseOb{..}      = mkBaseOb sent
    shape i         = Ox.shape <$> orth i
    shapeP i        = Ox.pack <$> shape i

-- | Packed shape of the word.
begPackedB :: X.Word w => Block w t ()
begPackedB sent = \ks -> do
    mapM_ (Ox.save . begPacked) ks
  where
    BaseOb{..}      = mkBaseOb sent
    shape i         = Ox.shape <$> orth i
    shapeP i        = Ox.pack <$> shape i
    begPacked i     = isBeg i <> pure "-" <> shapeP i
    isBeg i         = (Just . boolF) (i == 0)
    boolF True      = "T"
    boolF False     = "F"
    x <> y          = T.append <$> x <*> y

-- -- | Combined shapes of two consecutive (at @k-1@ and @k@ positions) words.
-- shapePairB :: Block w t ()
-- shapePairB sent = \ks ->
--     forM_ ks $ \i -> do
--         Ox.save $ link <$> shape  i <*> shape  (i - 1)
--   where
--     BaseOb{..}      = mkBaseOb sent
--     shape i         = Ox.shape <$> orth i
--     link x y        = T.concat [x, "-", y]
-- 
-- -- | Combined packed shapes of two consecutive (at @k-1@ and @k@ positions)
-- -- words.
-- packedPairB :: Block w t ()
-- packedPairB sent = \ks ->
--     forM_ ks $ \i -> do
--         Ox.save $ link <$> shapeP i <*> shapeP (i - 1)
--   where
--     BaseOb{..}      = mkBaseOb sent
--     shape i         = Ox.shape <$> orth i
--     shapeP i        = Ox.pack <$> shape i
--     link x y        = T.concat [x, "-", y]

-- | Body of configuration entry.
data Body a = Body {
    -- | Range argument for the schema block. 
      range     :: [Int]
    -- | When true, the entry is used only for oov words.
    , oovOnly   :: Bool
    -- | Additional arguments for the schema block.
    , args      :: a }
    deriving (Show)

instance Binary a => Binary (Body a) where
    put Body{..} = put range >> put oovOnly >> put args
    get = Body <$> get <*> get <*> get

-- | Maybe entry.
type Entry a = Maybe (Body a)

-- | Entry with additional arguemnts.
entryWith :: a -> [Int] -> Entry a
entryWith v xs = Just (Body xs False v)

-- | Plain entry with no additional arugments.
entry :: [Int] -> Entry ()
entry = entryWith ()

-- | Configuration of the schema.  All configuration elements specify the
-- range over which a particular observation type should be taken on account.
-- For example, the @[-1, 0, 2]@ range means that observations of particular
-- type will be extracted with respect to previous (@k - 1@), current (@k@)
-- and after the next (@k + 2@) positions when identifying the observation
-- set for position @k@ in the input sentence.
data SchemaConf = SchemaConf {
    -- | The 'orthB' schema block.
      orthC             :: Entry ()
    -- | The 'lowOrthB' schema block.
    , lowOrthC          :: Entry ()
    -- | The 'lowPrefixesB' schema block.  The first list of ints
    -- represents lengths of prefixes.
    , lowPrefixesC      :: Entry [Int]
    -- | The 'lowSuffixesB' schema block.  The first list of ints
    -- represents lengths of suffixes.
    , lowSuffixesC      :: Entry [Int]
    -- | The 'knownB' schema block.
    , knownC            :: Entry ()
    -- | The 'shapeB' schema block.
    , shapeC            :: Entry ()
    -- | The 'packedB' schema block.
    , packedC            :: Entry ()
    -- | The 'begPackedB' schema block.
    , begPackedC         :: Entry ()
    } deriving (Show)

instance Binary SchemaConf where
    put SchemaConf{..} = do
        put orthC
        put lowOrthC
        put lowPrefixesC
        put lowSuffixesC
        put knownC
        put shapeC
        put packedC
        put begPackedC
    get = SchemaConf
        <$> get <*> get <*> get <*> get
        <*> get <*> get <*> get <*> get

-- | Null configuration of the observation schema.
nullConf :: SchemaConf
nullConf = SchemaConf
    Nothing Nothing Nothing Nothing
    Nothing Nothing Nothing Nothing

mkArg0 :: X.Word w => Block w t () -> Entry () -> Schema w t ()
mkArg0 blk (Just x) = fromBlock blk (range x) (oovOnly x)
mkArg0 _   Nothing  = void ()

mkArg1 :: X.Word w => (a -> Block w t ()) -> Entry a -> Schema w t ()
mkArg1 blk (Just x) = fromBlock (blk (args x)) (range x) (oovOnly x)
mkArg1 _   Nothing  = void ()

-- | Build the schema based on the configuration.
fromConf :: X.Word w => SchemaConf -> Schema w t ()
fromConf SchemaConf{..} = sequenceS_
    [ mkArg0 orthB orthC
    , mkArg0 lowOrthB lowOrthC
    , mkArg1 lowPrefixesB lowPrefixesC
    , mkArg1 lowSuffixesB lowSuffixesC
    , mkArg0 knownB knownC
    , mkArg0 shapeB shapeC
    , mkArg0 packedB packedC
    , mkArg0 begPackedB begPackedC ]

-- | Use the schema to extract observations from the sentence.
schematize :: Schema w t a -> X.Sent w t -> [[Ob]]
schematize schema xs =
    map (Ox.execOx . schema v) [0 .. n - 1]
  where
    v = V.fromList xs
    n = V.length v