{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE DeriveDataTypeable #-}


module NLP.Concraft.Polish.DAG.Format.Base
(
-- * Printing
  ShowCfg (..)
, ProbType (..)
, showSent
, showData

-- * Parsing
, parseData
, parseSent
) where


import           Prelude hiding (Word)
import           Data.Monoid (mconcat, mappend)
import qualified Data.Map as M
import           Data.List (intersperse, groupBy)
-- import           Data.Maybe (listToMaybe)
import           Data.String (IsString)
import           Data.Data (Data)
import           Data.Typeable (Typeable)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as L
import           Text.Printf (printf)
import           Text.Read (readMaybe)

import qualified Data.DAG as DAG
-- import qualified Data.CRF.Chain1.Constrained.DAG.Dataset.Internal as DAG

import qualified NLP.Concraft.DAG.Morphosyntax as X
-- import qualified NLP.Concraft.Polish.DAG2 as C
-- import           NLP.Concraft.Polish.DAG2 (AnnoSent(..))
-- import qualified NLP.Concraft.Polish.DAGSeg as C
import           NLP.Concraft.Polish.DAGSeg (AnnoSent(..))
import qualified NLP.Concraft.Polish.Morphosyntax as I

import           NLP.Concraft.Polish.DAG.Morphosyntax hiding (tag, Tag)
import qualified NLP.Concraft.Polish.DAG.Morphosyntax as PolX


-----------------------------
-- Base
-----------------------------


type Tag = PolX.Interp PolX.Tag


-----------------------------
-- Showing
-----------------------------


-- | Printing configuration.
data ShowCfg = ShowCfg
--   { suppressProbs :: Bool
--     -- ^ Do not show any probabilities
  { probType  :: ProbType
    -- ^ Which type of probabilities to show (unless suppressed)
  , numericDisamb :: Bool
    -- ^ Print disamb markers as numerical values instead of probability values
  }


-- | Type of probabilities.
data ProbType
  = Marginals
    -- ^ Marginals of the disambiguation model
  | MaxProbs
    -- ^ Max probabilities of the disambiguation model
  | GuessedMarginals
    -- ^ Marginals of the guessing model
  deriving (Show, Eq, Ord, Enum, Typeable, Data)
-- Above, deriving Typeable and Data so that it can be easily parsed
-- for the command-line tool.


-- mkProbType :: ProbType -> Disamb.ProbType
-- mkProbType Marginals = Disamb.Marginals
-- mkProbType MaxProbs = Disamb.MaxProbs


-- | Show entire data.
showData :: ShowCfg -> [[AnnoSent]] -> L.Text
showData cfg
  = flip L.append "\n"
  . L.toLazyText
  . mconcat
  . intersperse "\n"
  . map (buildSents cfg)

-- | Show the given sentence.
showSent :: ShowCfg -> [AnnoSent] -> L.Text
showSent cfg = L.toLazyText . buildSents cfg

buildSents :: ShowCfg -> [AnnoSent] -> L.Builder
buildSents cfg =
  finalize . map (buildSent cfg)
  where
    -- finalize = (`mappend` "\n") . mconcat . intersperse "\n"
    finalize = mconcat

buildSent :: ShowCfg -> AnnoSent -> L.Builder
buildSent showCfg AnnoSent{..} = finalize $ do
  let dag = guessSent
  edgeID <- DAG.dagEdges dag
  let tailNode = DAG.begsWith edgeID dag
      headNode = DAG.endsWith edgeID dag
      X.Seg{..} = DAG.edgeLabel edgeID dag
  interpWeight <- map Just (M.toList (X.unWMap tags)) ++
            if known word then [] else [Nothing]
  return $ case interpWeight of
    Just (interp@Interp{..}, weight) -> buildInterp
      showCfg tailNode headNode word interp
      (case probType showCfg of
          Marginals ->
            tagWeightIn edgeID interp marginals
          MaxProbs ->
            tagWeightIn edgeID interp maxProbs
          GuessedMarginals ->
            weight)
      (tagLabelIn False edgeID interp disambs)
    -- below, the case when the word is unknown
    Nothing ->
      let interp = Interp
            { base = "none"
            , tag = ign
            , commonness = Nothing
            , qualifier = Nothing
            , metaInfo = Nothing
            , eos = False }
      in  buildInterp showCfg tailNode headNode word interp 0 False
  where
    finalize = (`mappend` "\n") . mconcat . intersperse "\n"
    tagWeightIn = tagLabelIn 0
    tagLabelIn def i x anno
      = maybe def (tagLabel def x) (DAG.maybeEdgeLabel i anno)
    tagLabel def x = maybe def id . M.lookup x


buildInterp
  :: ShowCfg
  -> DAG.NodeID  -- ^ Tail node
  -> DAG.NodeID  -- ^ Head node
  -> Word        -- ^ Word
  -> Interp PolX.Tag
  -> Double
  -> Bool
  -> L.Builder
buildInterp ShowCfg{..} tailNode headNode word Interp{..} weight disamb =
  mconcat $ intersperse "\t" $
  [ buildNode tailNode
  , buildNode headNode
  , L.fromText $ orth word
  , L.fromText $ if known word then base else orth word
  , L.fromText tag
  , buildMayText commonness
  , buildMayText qualifier
  , if numericDisamb
    then buildDisamb disamb
    else buildWeight weight
  , buildMayText metaInfo
  , if eos then "eos" else ""
  ] ++
  if numericDisamb then [] else [buildDisamb disamb]
  where
    buildNode (DAG.NodeID i) = L.fromString (show i)
    buildWeight = L.fromString . printf "%.4f"
    buildDisamb True  = if numericDisamb then "1.0000" else "disamb"
    buildDisamb False = if numericDisamb then "0.0000" else ""
    -- buildDmb = between "\t" "\n" . L.fromString . printf "%.3f"
    -- between x y z = x <> z <> y
    buildMayText Nothing = ""
    buildMayText (Just x) = L.fromText x


-----------------------------
-- Parsing
-----------------------------


-- | Parse the text in the DAG format.
parseData :: L.Text -> [Sent Tag]
parseData =
  map parseSent . filter realSent . L.splitOn "\n\n"
  where
    realSent = not . L.null


-- | Parse sentence in the DAG format.
parseSent :: L.Text -> Sent Tag
parseSent = fromRows . parseRows


data Row = Row
  { tailNode   :: Int
  , headNode   :: Int
  , orthForm   :: T.Text
  , baseForm   :: T.Text
  , theTag     :: PolX.Tag
  , commonness :: Maybe T.Text
  , qualifier  :: Maybe T.Text
  , tagProb    :: Double
  , metaInfo   :: Maybe T.Text
  , eos        :: Bool
  }


fromRows :: [Row] -> Sent Tag
fromRows =
  -- DAG.fromList' I.None . zip (repeat I.None) . getEdges
  DAG.mapN (const I.None) . DAG.fromEdgesUnsafe . getEdges
  where
    getEdges = map mkEdge . groupBy theSameEdge
    theSameEdge r1 r2
      =  tailNode r1 == tailNode r2
      && headNode r1 == headNode r2
    mkEdge [] = error "Format.Base.fromRows: empty list"
    mkEdge rows@(row0:_) = DAG.Edge
      { DAG.tailNode = DAG.NodeID $ tailNode row0
      , DAG.headNode = DAG.NodeID $ headNode row0
      , DAG.edLabel = edge }
      where
        edge = X.Seg
          { word = newWord
          , tags = newTags }
        newWord = Word
          { orth = orthForm row0
          , known = not $ ign `elem` map theTag rows }
        newTags = X.mkWMap
          [ (interp, tagProb)
          | Row{..} <- rows
          , not $ theTag == ign
          , let interp = Interp
                  { base = baseForm
                  , tag = theTag
                  , commonness = commonness
                  , qualifier = qualifier
                  , metaInfo = metaInfo
                  , eos = eos }
          ]


parseRows :: L.Text -> [Row]
parseRows = map parseRow . L.splitOn "\n"


parseRow :: L.Text -> Row
parseRow =
  doit . L.splitOn "\t"
  where
    doit (tlNode : hdNode : otForm : bsForm : tag :
          comm : qual : prob : meta : eos : _) = Row
      { tailNode = readTyp "tail node" $ L.unpack tlNode
      , headNode = readTyp "head node" $ L.unpack hdNode
      , orthForm = L.toStrict otForm
      , baseForm = L.toStrict bsForm
      , theTag   = L.toStrict tag
      , commonness = nullIfEmpty comm
      , qualifier = nullIfEmpty qual
      , tagProb  = readTyp "probability value" $ L.unpack prob
      , metaInfo = nullIfEmpty meta
      , eos = case eos of
          "eos" -> True
          _ -> False
      }
    doit _ = error "parseRow: unexpected number of row cells"
    nullIfEmpty x = case x of
      "" -> Nothing
      _  -> Just (L.toStrict x)


-----------
-- Utils
-----------


-- -- | An infix synonym for 'mappend'.
-- (<>) :: Monoid m => m -> m -> m
-- (<>) = mappend
-- {-# INLINE (<>) #-}


readTyp :: (Read a) => String -> String -> a
readTyp typ x =
  case readMaybe x of
    Just y -> y
    Nothing -> error $
      "unable to parse \"" ++ x ++ "\" to a " ++ typ
--       "Unable to parse <" ++ typ ++ ">" ++
--       " (string=" ++ x ++ ")"


-- | Tag which indicates unknown words.
ign :: IsString a => a
ign = "ign"
{-# INLINE ign #-}