{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE LambdaCase #-}


-- | DAG-aware morphosyntax data layer in Polish.


module NLP.Concraft.Polish.DAG.Morphosyntax
(
-- * Tag
  Tag

-- * Edge
-- , Edge (..)
, Word (..)
, Interp (..)
, voidInterp
, Space (..)
-- , select
-- , select'
, selectWMap
, selectAnno

-- * Sentence
, Sent
, SentO (..)
, restore
, restore'
, withOrig

-- * Conversion
, packSent
, packSentO
-- , packSeg

-- -- ** From simple sentence
-- , fromList
) where


import           Prelude hiding (Word)
import           Control.Applicative ((<$>), (<*>))
-- import           Control.Arrow (first)
-- import           Control.Monad (guard)
import           Data.Binary (Binary, put, get)
import           Data.Aeson
-- import qualified Data.Aeson as Aeson
import qualified Data.Set as S
import qualified Data.Map.Strict as M
-- import           Data.Maybe (catMaybes)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
-- import qualified Data.Tagset.Positional as P

import           Data.DAG (DAG)
import qualified Data.DAG as DAG
-- import           Data.CRF.Chain1.Constrained.DAG.Dataset.Internal (DAG)
-- import qualified Data.CRF.Chain1.Constrained.DAG.Dataset.Internal as DAG

-- import qualified NLP.Concraft.DAG2 as C
import qualified NLP.Concraft.DAG.Morphosyntax as X
import qualified NLP.Concraft.DAG.Segmentation as Seg
-- import qualified NLP.Concraft.Polish.Morphosyntax as R
import           NLP.Concraft.Polish.Morphosyntax (Space(..))


--------------------------------
-- Basics
--------------------------------


-- | A textual representation of a morphosyntactic tag.
type Tag = T.Text


--------------------------------
-- Interp
--------------------------------


-- | A morphosyntactic interpretation.
data Interp t = Interp
    { base  :: T.Text
      -- ^ The base form (lemma)
    , tag   :: t
      -- ^ The (morphosyntactic) tag
    , commonness :: Maybe T.Text
    , qualifier  :: Maybe T.Text
    , metaInfo   :: Maybe T.Text
    , eos        :: Bool
      -- ^ The remaining four are ignored for the moment, but we plan to rely on
      -- them later on.
    } deriving (Show, Eq, Ord)


-- | An almost empty interpretation, with only the `tag` actually specified.
voidInterp :: t -> Interp t
voidInterp x = Interp
  { base = "none"
  , tag = x
  , commonness = Nothing
  , qualifier = Nothing
  , metaInfo = Nothing
  , eos = False
  }


instance (Ord t, Binary t) => Binary (Interp t) where
    put Interp{..} = do
      put base
      put tag
      put commonness
      put qualifier
      put metaInfo
      put eos
    get = Interp <$> get <*> get <*> get <*> get <*> get <*> get


--------------------------------
-- Edge
--------------------------------


-- -- | An edge consists of a word and a set of morphosyntactic interpretations.
-- data Edge t = Edge
--     { word      :: Word
--     -- | Interpretations of the word, each interpretation annotated
--     -- with a /disamb/ Boolean value (if 'True', the interpretation
--     -- is correct within the context).
--     , interps   :: X.WMap (Interp t) }
--     deriving (Show, Eq, Ord)
--
-- instance (Ord t, Binary t) => Binary (Edge t) where
--     put Edge{..} = put word >> put interps
--     get = Edge <$> get <*> get
--
-- instance X.Word (Edge t) where
--     orth = X.orth . word
--     oov = X.oov . word


--------------------------------
-- Word
--------------------------------


-- | A word.
data Word = Word
    { orth      :: T.Text
    -- , space     :: Space
    , known     :: Bool }
    deriving (Show, Eq, Ord)

instance X.Word Word where
    orth = orth
    oov = not.known

instance Binary Word where
    -- put Word{..} = put orth >> put space >> put known
    put Word{..} = put orth >> put known
    -- get = Word <$> get <*> get <*> get
    get = Word <$> get <*> get

instance ToJSON Word where
    toJSON Word{..} = object
        [ "orth"  .= orth
        , "known" .= known ]

instance FromJSON Word where
    parseJSON (Object v) = Word
        <$> v .: "orth"
        <*> v .: "known"
    parseJSON _ = error "parseJSON [Word]"


---------------------------------------------------------------------------------
-- Selection
--
-- (Honestly, I don't remember what is this one about...)
--
-- Update: maybe related to the fact that base forms have to be handled somehow?
---------------------------------------------------------------------------------


-- -- | Select one chosen interpretation.
-- select :: Ord a => a -> Edge a -> Edge a
-- select = select' []
--
--
-- -- | Select multiple interpretations and one chosen interpretation.
-- select' :: Ord a => [a] -> a -> Edge a -> Edge a
-- select' ys x = selectWMap . X.mkWMap $ (x, 1) : map (,0) ys


-- | Select interpretations.
selectAnno :: Ord a => M.Map (Interp a) Double -> X.Seg Word (Interp a) -> X.Seg Word (Interp a)
selectAnno = selectWMap . X.fromMap


-- | Select interpretations.
selectWMap :: Ord a => X.WMap (Interp a) -> X.Seg Word (Interp a) -> X.Seg Word (Interp a)
selectWMap wMap seg = seg {X.tags = wMap}
--     seg { X.tags = newTags }
--   where
--     wSet = S.fromList . map tag . M.keys . X.unWMap . X.tags $ seg
--     newTags = X.mkWMap $
--         -- [ case M.lookup (tag interp) (X.unWMap wMap) of
--         [ case M.lookup interp (X.unWMap wMap) of
--             Just x  -> (interp, x)
--             Nothing -> (interp, 0)
--         | interp <- (M.keys . X.unWMap) (X.tags seg) ]
--             ++ catMaybes
--         [ if interp `S.member` wSet
--             then Nothing
--             else Just (interp, x)
--         | (interp, x) <- M.toList (X.unWMap wMap)
-- --         | let lemma = orth $ X.word seg   -- Default base form
-- --         , (tag, x) <- M.toList (X.unWMap wMap)
-- --         , let interp = Interp
-- --                 { base = lemma
-- --                 , tag = tag
-- --                 , commonness = Nothing
-- --                 , qualifier = Nothing
-- --                 , metaInfo = Nothing
-- --                 , eos = False }
--         ]


--------------------------------
-- Sentence
--------------------------------


-- | A sentence.
-- type Sent t = DAG Space (Edge t)
type Sent t = DAG Space (X.Seg Word t)


-- | A sentence with its original textual representation.
data SentO t = SentO
    { sent  :: Sent t
    , orig  :: L.Text }


-- | Restore textual representation of a sentence. The function is not very
-- accurate, it could be improved if we enriched the representation of spaces.
restore :: Sent t -> L.Text
restore =
    let edgeStr = orth . X.word
        spaceStr None    = ""
        spaceStr Space   = " "
        spaceStr NewLine = "\n"
    in  L.fromChunks . map (either spaceStr edgeStr) . pickPath


-- | Use `restore` to translate `Sent` to a `SentO`.
withOrig :: Sent t -> SentO t
withOrig s = SentO
    { sent = s
    , orig = restore s }


-- | A version of `restore` which places a space on each node.
restore' :: Sent t -> L.Text
restore' =
    let edgeStr = orth . X.word
        spaceStr = const " "
    in  L.fromChunks . map (either spaceStr edgeStr) . tail . pickPath


--------------------------------
-- Utils
--------------------------------


-- | Pick any path from the given DAG. The result is a list
-- of interleaved node and edge labels.
pickPath :: (X.Word b) => DAG a b -> [Either a b]
pickPath dag =
  fstNodeVal path : concatMap getVals path
  where
    -- Take the value on the first node on the path
    fstNodeVal = \case
      edgeID : _ ->
        Left $ DAG.nodeLabel (DAG.begsWith edgeID dag) dag
      [] -> error "Morphosyntax.pickPath: empty path"
    -- Select a shortest path in the DAG; thanks to edges being topologically
    -- sorted, this should give us the list of edge IDs in an appropriate order.
    path = S.toList $ Seg.findPath Seg.Min dag
    -- Get the labels of the nodes and labels
    getVals edgeID =
      let
        nodeID = DAG.endsWith edgeID dag
      in
        [ Right $ DAG.edgeLabel edgeID dag
        , Left  $ DAG.nodeLabel nodeID dag ]


---------------------------
-- Conversion
---------------------------


-- -- | Convert a segment to a segment from the core library.
-- packSeg :: Ord a => X.Seg Word a -> X.Seg Word a
-- packSeg = id
-- -- packSeg Edge{..}
-- --     = X.Seg word
-- --     $ X.mkWMap
-- --     $ map (first tag)
-- --     $ M.toList
-- --     $ X.unWMap interps


-- | Convert a sentence to a sentence from the core library.
packSent :: Ord a => Sent a -> X.Sent Word a
packSent
  = DAG.mapN (const ())
  -- . fmap packSeg


-- | Convert a sentence to a sentence from the core library.
-- packSentO :: P.Tagset -> SentO Tag -> X.SentO Word P.Tag
packSentO :: Ord a => SentO a -> X.SentO Word a
packSentO s = X.SentO
    { segs = packSent (sent s)
    , orig = orig s }


---------------------------
-- From simple sentence
---------------------------


-- fromWord :: R.Word -> (Space, Word)
-- fromWord R.Word{..} = (space, Word {orth=orth, known=known})
--
--
-- fromSeg :: (Ord t) => R.Seg t -> (Space, Edge t)
-- fromSeg R.Seg{..} =
--   (space, edge)
--   where
--     edge = Edge {word = newWord, interps = updateInterps interps}
--     (space, newWord) = fromWord word
--     -- updateInterps = X.mkWMap . map (first fromInterp) . X.unWMap
--     updateInterps = X.mapWMap fromInterp
--
--
-- fromInterp :: R.Interp t -> Interp t
-- fromInterp R.Interp{..} = Interp
--   { base = base
--   , tag = tag
--   , commonness = Nothing
--   , qualifier = Nothing
--   , metaInfo = Nothing
--   , eos = False }
--
--
-- -- | Create a DAG-based sentence from a list-based sentence.
-- fromList :: Ord t => R.Sent t -> Sent t
-- fromList = DAG.fromList' None . map fromSeg