module Language.Lojban.Jbovlaste
    (-- * Types
     JboDB
    ,JboValsi
    ,JboValsiType(..)
    -- * Generation of the database
    ,genDB
    ,genDBString
    ,readDB
    -- * Querying the database
    ,findValsi
    ,filterValsi
    ,valsi
    ,defSub
    ,defWildCard
    -- * Accessing parts of valsis
    ,valsiWord
    ,valsiGloss
    ,valsiDef
    ,valsiRafsis
    ,valsiNotes
    ,valsiSelma'o
    ,valsiSelrafsis
    ,valsiType
     -- * Showing valsi
    ,showValsi
    ,showType
    )
    where

import Utils
import qualified System.IO.Strict as StrictIO
import Text.XML.Light.Input
import Text.XML.Light.Types
import Text.XML.Light.Proc
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe
import Text.Regex
import Data.List
import Data.Ord
import Language.Lojban.Lujvo
import Data.Map (Map)
import Control.Monad
import Data.Function
import WildCard
import Data.Char

-- | Find valsi(s) by word or gloss or rafsi.
valsi :: JboDB -> String -> [JboValsi]
valsi db s = sortBy (comparing valsiType) $ filterValsi db match where
    match e = valsiWord e == s || any (==s) (valsiGloss e)
              || (valsiType e == GismuType && 
                  (any (==s) (valsiRafsis e) 
                   || (length s == 4 && s `isPrefixOf` valsiWord e)))

-- | Find valsi(s) by definition substring.
defSub :: JboDB -> String -> [JboValsi]
defSub db s = filterValsi db match where
    match | any (not . isLetter) s = match1
          | otherwise              = match2
    match1 = isInfixOf (lower s) . lower . valsiDef
    match2 = any ((isPrefixOf $ lower s) . lower) . splitBy (not . isLetter) . valsiDef

-- | Find valsi(s) by definition wild card string.
defWildCard :: JboDB -> String -> [JboValsi]
defWildCard db s = filterValsi db match where
    match = isNothing . wildcard s . valsiDef

-- | Filter valsi(s) by a predicate.
filterValsi :: JboDB -> (JboValsi -> Bool) -> [JboValsi]
filterValsi db f = map snd $ filter (f . snd) $ M.assocs $ valsis $ db

-- | Find valsi(s) by a predicate.
findValsi :: JboDB -> (JboValsi -> Bool) -> Maybe JboValsi
findValsi db f = (fmap snd) $ find (f . snd) $ M.assocs $ valsis $ db

-- | Read in a database from file.
readDB :: FilePath -> IO JboDB
readDB f = (JboDB . genMapLujvo . valsis . read) `fmap` readFile f

-- | Generate a database from the Jbovlaste XML export file.
genDB :: FilePath -> IO JboDB
genDB f = genDBString `fmap` StrictIO.readFile f

-- | Generate a database from the Jbovlaste XML export.
genDBString :: String -> JboDB
genDBString = JboDB . genMap' . fromJust . parseXMLDoc

genMap' :: Element -> Map String JboValsi
genMap' xml = foldr update M.empty es where
    es = entries xml
    update e m | qname == name "valsi"
                 && any (==type') ["gismu","fu'ivla","cmavo","lujvo","cmavo cluster"]
                     = M.insert w (elemToValsi e es m) m
               | otherwise = m
             where qname = elName e
                   w = fromJust $ attr "word" e
                   type' = fromMaybe "" $ attr "type" e

genMapLujvo :: Map String JboValsi -> Map String JboValsi
genMapLujvo m = foldr resolve m $ M.toList m where
    resolve (w,v) = case valsiType v of
                      LujvoType -> let (JboLujvo rs _ e) = v
                                       ss = catMaybes $ map (flip selrafsi m) rs
                                   in M.insert w (JboLujvo rs ss e)
                      _         -> id

entries :: Element -> [Element]
entries = filterElements correct where
    correct e = qname == name "valsi" || qname == name "nlword"
        where qname = elName e

elemToValsi :: Element -> [Element] -> Map String JboValsi -> JboValsi
elemToValsi e es m | type' == "gismu"         = makeGismu e es
                   | type' == "fu'ivla"       = makeFu'ivla e es
                   | type' == "cmavo cluster" = makeCmavo True e es
                   | type' == "cmavo"         = makeCmavo False e es
                   | type' == "lujvo"         = makeLujvo e es m
              where type' = fromJust $ attr "type" e
                    word = fromJust $ attr "word" e

makeGismu :: Element -> [Element] -> JboValsi
makeGismu e es = JboGismu rafsis (JboEntry word gloss def notes) where
    (rafsis,word,gloss,def,notes) = entryGet e es

makeFu'ivla :: Element -> [Element] -> JboValsi
makeFu'ivla e es = JboFu'ivla (JboEntry word gloss def notes) where
    (_,word,gloss,def,notes) = entryGet e es

makeCmavo :: Bool -> Element -> [Element] -> JboValsi
makeCmavo c e es = JboCmavo c rafsis selma'o (JboEntry word gloss def notes) where
    (rafsis,word,gloss,def,notes) = entryGet e es
    selma'o = subStr e "selmaho"

makeLujvo :: Element -> [Element] -> Map String JboValsi -> JboValsi
makeLujvo e es m = JboLujvo rafsis' [] (JboEntry word gloss def notes) where
    (_,word,gloss,def,notes) = entryGet e es
    rafsis' = rafsis word

selrafsi :: String -> Map String JboValsi -> Maybe JboValsi
selrafsi s = fmap snd . find sel . M.assocs where
    sel (w,v) = case valsiType v of
                  GismuType -> (isPrefixOf s w && length s >= 4) || any (==s) rafsis
                  CmavoType -> any (==s) rafsis
                  _         -> False
       where rafsis = valsiRafsis v

entryGet :: Element -> [Element] -> ([String],String,[String],String,String)
entryGet e es = (rafsis,word,gloss,def,notes) where
    word = fromJust $ attr "word" e
    rafsis = map strContent $ filterElements ((==name "rafsi") . elName) e
    gloss = glosses word es
    def = subStr e "definition"
    notes = subStr e "notes"

subStr e n = format $ fromMaybe "" $ strContent `fmap` findElement (name n) e

glosses w es = map (fromMaybe "" . attr "word") $ sortBy (comparing $ attr "place") $ filter glossForThis es where
    glossForThis e = elName e == name "nlword"
                     && attr "valsi" e == Just w

-- Format descriptions, strip whitespace, change LateX parts to
-- simple text, e.g. $x_1$ to x1.
format :: String -> String
format = trim . third . second . first where
    first = flip (subRegex (mkRegex "\\$([a-z]+)_([0-9]+)\\$")) "\\1\\2"
    second = flip (subRegex (mkRegex "\\$([a-z]+)_\\{([0-9]+)\\}\\$")) "\\1\\2"
    third = flip (subRegex (mkRegex "\\$([a-z]+)_([0-9]+)=([a-z]+)_([0-9]+)\\$")) "\\1\\2=\\3\\4"

-- | Get the word of a valsi.
valsiWord :: JboValsi -> String
valsiWord = entry _entryWord

-- | Get the gloss(es) of a valsi.
valsiGloss :: JboValsi -> [String]
valsiGloss = entry _entryGloss

-- | Get the definition of a valsi.
valsiDef :: JboValsi -> String
valsiDef = entry _entryDef

-- | Get the notes of a valsi.
valsiNotes :: JboValsi -> String
valsiNotes = entry _entryNotes

-- | Get the selma'o of a cmavo (Nothing for non-cmavo).
valsiSelma'o :: JboValsi -> Maybe String
valsiSelma'o w = case valsiType w of
                   CmavoType -> Just (_cmavoSelma'o w)
                   _         -> Nothing

-- | Get the selrafsis of a lujvo (empty list for non-lujvo).
valsiSelrafsis :: JboValsi -> [JboValsi]
valsiSelrafsis w = case valsiType w of
                     LujvoType -> _lujvoSelrafsis w
                     _         -> []

entry = (. join valsiEntry)

valsiEntry w =
    case valsiType w of
      GismuType   -> _gismuEntry
      CmavoType   -> _cmavoEntry
      LujvoType   -> _lujvoEntry
      Fu'ivlaType -> _fu'ivlaEntry

-- | Get the type of a valsi.
valsiType :: JboValsi -> JboValsiType
valsiType (JboGismu _ _)   = GismuType
valsiType (JboCmavo _ _ _ _) = CmavoType
valsiType (JboLujvo _ _ _) = LujvoType
valsiType (JboFu'ivla _ )  = Fu'ivlaType

showType w = case valsiType w of
               CmavoType | _cmavoCluster w -> "cmavo cluster"
               _ -> map toLower . takeWhile (/='T') . show $ valsiType w

data JboDB = JboDB { valsis :: Map String JboValsi } 
           deriving (Read,Show)

data JboEntry = 
    JboEntry {
      _entryWord   :: String
    , _entryGloss  :: [String]
    , _entryDef    :: String
    , _entryNotes  :: String
    }
              deriving (Read,Show)

data JboValsiType = CmavoType | GismuType | LujvoType | Fu'ivlaType  
                deriving (Eq,Show,Read,Ord)

-- | An opaque data type on which accessors can be used.
data JboValsi =
    JboGismu { _gismuRafsis :: [String]
             , _gismuEntry  :: JboEntry }
  | JboCmavo { _cmavoCluster :: Bool
             , _cmavoRafsis :: [String]
             , _cmavoSelma'o :: String
             , _cmavoEntry   :: JboEntry }
  | JboLujvo { _lujvoRafsis    :: [String]
             , _lujvoSelrafsis :: [JboValsi]
             , _lujvoEntry     :: JboEntry }
  | JboFu'ivla { _fu'ivlaEntry :: JboEntry }
    deriving (Show,Read)

showValsi :: JboValsi -> String
showValsi w = showType w ++ " " ++ braces (valsiWord w)
              ++ selma'o (valsiSelma'o w)
              ++ rafsis (valsiRafsis w) 
              ++ selrafs (lujvoSelrafsis w) ++ selgloss (lujvoSelrafsis w)
              ++ glosses (valsiGloss w)
              ++ ": " ++ valsiDef w ++ notes (valsiNotes w)
    where rafsis = list "" ((", with rafsi "++) . braces . commas)
          selrafs = list "" ((", selrafsi "++) . braces . commas . map valsiWord)
          selgloss = list "" ((' ':) . parens . commas . map (slashes . valsiGloss))
          selma'o = maybe "" (", of selma'o "++)
          glosses = list "" ((", glossing to "++) . commas . map speech)
          notes = list "" (" Notes: "++)

instance Eq JboValsi where (==) = (==) `on` valsiWord
instance Ord JboValsi where compare = compare `on` valsiType

-- | Get the selrafsis of a lujvo.
lujvoSelrafsis :: JboValsi -> [JboValsi]
lujvoSelrafsis w = case valsiType w of
                     LujvoType -> _lujvoSelrafsis w
                     _         -> []

-- | Get any rafsis of a valsi.
valsiRafsis :: JboValsi -> [String]
valsiRafsis w = 
    case valsiType w of
      GismuType -> _gismuRafsis w
      CmavoType -> _cmavoRafsis w
      LujvoType -> _lujvoRafsis w
      _         -> []

-- Utilities

braces s = "{" ++ s ++ "}"
parens s = "(" ++ s ++ ")"
speech s = "\"" ++ s ++ "\""
commas = intercalate ", " . filter (not . null)
slashes = intercalate "/" . filter (not . null)
name n = QName n Nothing Nothing
attr = findAttr . name
lower = map toLower