{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE RecordWildCards #-}

module NLP.SentiwordnetParser
  ( parse
  , SentiWordNet(..)
  , Entry(..)
  , SynsetTerm(..)
  , POS(..)
  -- * internal stuff
  , parseSentiWordNet
  , parsePOS
  , parseDecimal
  , parseInt
  , parseEntry
  , parseSynsetTerm
  , parseComment
  , test
  ) where

import Control.Applicative
import Data.Decimal (Decimal)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Safe
import Text.Trifecta

data SynsetTerm = SynsetTerm
  { name :: Text
  , num :: Int
  } deriving (Show, Eq)

data POS
  = Noun
  | Verb
  | Adjective
  | AdjectiveSatellite
  | Adverb
  deriving (Show, Eq)

data Entry = Entry
  { pos :: POS
  , id_ :: Text
  , posScore :: Decimal
  , negScore :: Decimal
  , synsetTerms :: [SynsetTerm]
  , gloss :: Text
  } deriving (Show, Eq)

data SentiWordNet = SentiWordNet
  { items :: [Entry]
  } deriving (Show, Eq)

parsePOS :: Parser POS
parsePOS =
  pure Noun <* char 'n' <|> pure Verb <* char 'v' <|> pure Adjective <* char 'a' <|>
  pure AdjectiveSatellite <* char 's' <|>
  pure Adverb <* char 'r'

parseInt :: Parser Int
parseInt = do
  s <- many alphaNum
  case readMay s of
    Nothing -> fail ("Failed to read an int: " ++ s)
    Just res -> return res

parseDecimal :: Parser Decimal
parseDecimal = do
  s <- many (alphaNum <|> char '.')
  let r = readMay s
  case r of
    Nothing -> fail ("Failed to read decimal: " ++ s)
    Just res -> return res

parseSynsetTerm :: Parser SynsetTerm
parseSynsetTerm = do
  name <- T.pack <$> many (noneOf ['#'])
  _ <- char '#'
  num <- parseInt
  return SynsetTerm {..}

parseEntry :: Parser Entry
parseEntry = do
  _ <- optional (many parseComment)
  pos <- parsePOS
  _ <- tab
  id_ <- T.pack <$> many (noneOf ['\t'])
  _ <- tab
  posScore <- parseDecimal
  _ <- tab
  negScore <- parseDecimal
  _ <- tab
  synsetTerms <- sepBy parseSynsetTerm (char ' ')
  _ <- tab
  gloss <- T.pack <$> many (noneOf ['\r', '\n'])
  _ <- optional newline
  return (Entry {..})

parseComment :: Parser String
parseComment = char '#' *> many (noneOf ['\n']) <* optional newline

parseSentiWordNet :: Parser SentiWordNet
parseSentiWordNet = SentiWordNet <$> some parseEntry

parse :: Text -> Result SentiWordNet
parse = parseString parseSentiWordNet mempty . T.unpack

test :: IO ()
test = do
  sentiWordNet <-
    T.readFile
      "/home/kb/Downloads/SentiWordNet_3.0.0/SentiWordNet_3.0.0_20130122.txt"
  let res = parse sentiWordNet
  case res of
    Success r -> print (length (items r))
    Failure e -> print e