module NLP.HistPL.Analyse
(
Token (..)
, Other (..)
, tokenize
, rmHyphen
, anaWord
, mapL
, JConf (..)
, ShowCont (..)
, defaultJConf
, jsonAna
) where
import Control.Applicative ((<$>), (<*>), pure)
import Data.Maybe (fromJust)
import Data.Monoid (Monoid, mappend, mconcat)
import Data.Ord (comparing)
import Data.List (sortBy, intersperse)
import qualified Data.Map as M
import qualified Data.Char as C
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as L
import Data.Aeson
import qualified NLP.Morfeusz as R
import qualified NLP.HistPL.Lexicon as H
data Token = Token {
orth :: T.Text
, hist :: [(H.LexEntry, H.Code)]
, cont :: [[R.Interp]] }
deriving (Show)
data Other
= Pun T.Text
| Space T.Text
deriving (Show)
rmHyphen :: L.Text -> L.Text
rmHyphen = L.concat . L.splitOn "-\n"
tokenize :: T.Text -> [Either T.Text Other]
tokenize =
map mkElem . T.groupBy cmp
where
cmp x y
| C.isPunctuation x = False
| C.isPunctuation y = False
| otherwise = C.isSpace x == C.isSpace y
mkElem x
| T.any C.isSpace x = Right (Space x)
| T.any C.isPunctuation x = Right (Pun x)
| otherwise = Left x
mapL :: (Functor m, Monad m) => (a -> m a') -> [Either a b] -> m [Either a' b]
mapL f =
let g (Left x) = Left <$> f x
g (Right y) = return (Right y)
in mapM g
anaWord :: H.HistPL -> T.Text -> IO Token
anaWord hpl x = do
_hist <- H.lookupMany hpl [x, T.toLower x]
_cont <- return (anaCont x)
return $ Token x _hist _cont
anaCont :: T.Text -> [[R.Interp]]
anaCont = map R.interps . head . R.paths . R.analyse False
data ShowCont
= NoShowCont
| ShowCont
| ForceShowCont
deriving (Show, Eq, Ord)
data JConf = JConf
{ showCont :: ShowCont
, showDefs :: Bool
} deriving (Show, Eq, Ord)
defaultJConf :: JConf
defaultJConf = JConf
{ showCont = ShowCont
, showDefs = False }
jsonAna :: JConf -> [Either Token Other] -> Value
jsonAna jc = toJSON . map (jsonSent jc)
jsonSent :: JConf -> Either Token Other -> Value
jsonSent jc = either (jsonTok jc) (jsonOther jc)
jsonOther :: JConf -> Other -> Value
jsonOther _ (Space _) = toJSON ("space" :: T.Text)
jsonOther _ (Pun t) = toJSON $ "pun: " `T.append` t
jsonTok :: JConf -> Token -> Value
jsonTok jc tok = object $
[ "orth" .= orth tok
, "hist" .= map (jsonHist jc) (hist tok) ] ++
maybeCont
where
maybeCont
| null (hist tok) && showCont jc /= NoShowCont = [contElem]
| showCont jc == ForceShowCont = [contElem]
| otherwise = []
contElem = "cont" .= jsonCont jc (cont tok)
jsonHist :: JConf -> (H.LexEntry, H.Code) -> Value
jsonHist jc (entry, code) = object $
[ "id" .= jsonID (H.lexId entry, code)
, "pos" .= H.pos entry
, "base" .= H.text (H.lemma entry) ]
++ if showDefs jc then [defsElem] else []
where
jsonID (id', cd') = toJSON (toJSON id', jsonCode cd')
jsonCode code' = toJSON $ case code' of
H.Orig -> "orig" :: T.Text
H.Both -> "both"
H.Copy -> "copy"
defsElem = "defs" .= concatMap H.text (getDefs entry)
getDefs = concatMap H.defs . H.senses
jsonCont :: JConf -> [[R.Interp]] -> Value
jsonCont _ = toJSON . map (map jsonI) where
jsonI x = object
[ "base" .= R.base x
, "msd" .= R.msd x ]
(<>) :: Monoid m => m -> m -> m
(<>) = mappend