-- | A module which provides an way to query XML exports of Jbovlaste.

{-# LANGUAGE PatternGuards #-}

module Lojban.Jbovlaste 
    (-- * Types
     JbovlasteDB
    ,JbovlasteEntry
    ,JbovlasteEntryType(..)
    -- * Opening
    ,openJbovlaste
    -- * Querying
    ,valsi
    ,selma'o
    ,selrafsi
    ,selrafsis
    ,findByDef
    ,filterEntries
    -- * Inspection
    ,entryWord
    ,entryType
    ,entryGloss)
    where

import Prelude hiding (readFile,elem)
import System.IO.Strict (readFile)
import Text.XML.Light.Input
import Text.XML.Light.Types
import Text.XML.Light.Proc
import Control.Applicative
import Data.Maybe
import Data.List hiding (elem)
import Data.Char
import Data.Function
import Lojban.Lujvo

-- | Open an XML export of Jbovlaste for querying (strictly).
openJbovlaste :: FilePath -> IO (Maybe JbovlasteDB)
openJbovlaste path = (JDB path <$>) <$> parseXMLDoc <$> readFile path

-- | Find the selrafsis of a lujvo.
selrafsis :: JbovlasteDB -> String -> [String]
selrafsis db = map fromJust . catMaybes . map ((entryWord <$>) . selrafsi db . show) . rafsis

-- | Filter entries according to a predicate.
filterEntries :: JbovlasteDB -> (JbovlasteEntry -> Bool) -> [JbovlasteEntry]
filterEntries db f = (entry db <$>) . filterElements (f . entry db) $ elem db

-- | Find (maybe) a valsi by rafsi.
selrafsi :: JbovlasteDB -> String -> Maybe JbovlasteEntry
selrafsi db t = entry db <$> filterElement match (elem db) where
    match e | any ((==t) . strContent) $ findChildren (name "rafsi") e = True
            | length t' >= 4 && isPrefixOf t (fromMaybe "" $ attr "word" e) 
              && (attr "type" e == Just "gismu" || attr "type" e == Just "cmavo") = True
            | otherwise = False
    t' = filter (/='\'') t

-- | Find valsi(s) by selma'o.
selma'o :: JbovlasteDB -> String -> [JbovlasteEntry]
selma'o db t = entry db <$> filterElements match (elem db) where
    match = any ((==lower t) . lower . strContent) . findChildren (name "selmaho")

-- | Find a valsi by searching for word or gloss, and
--   resolving gloss entries to valsi entries.
valsi :: JbovlasteDB -> String -> [JbovlasteEntry]
valsi db t = entry db <$> (resolve db $ filterElements match (elem db)) where
    match e | Just w <- attr "word" e = w == t
            | otherwise               = False

-- | Find valsis according to a predicate applied to the definition.
findByDef :: JbovlasteDB -> (String -> Bool) -> [JbovlasteEntry]
findByDef db t = entry db <$> filterElements match (elem db) where
    match = any (t . strContent) . findChildren (name "definition")

-- | Inspect an entry for the word.
entryWord :: JbovlasteEntry -> Maybe String
entryWord = attr "word" . entryElem

-- | Inspect an entry for the gloss.
entryGloss :: JbovlasteEntry -> Maybe String
entryGloss = _entryGloss

-- | What type of word is the entry?
entryType :: JbovlasteEntry -> JbovlasteEntryType
entryType e = case attr "type" $ entryElem e of
                Just "cmavo" -> Cmavo
                Just "lujvo" -> Lujvo
                Just "gismu" -> Gismu
                _ -> Other

-- Construct an entry.
entry db e = Entry e (gloss db e)

-- Find the gloss for a valsi.
gloss :: JbovlasteDB -> Element -> Maybe String
gloss db e = attr "word" =<< filterElement filter (elem db) where
    filter e' | elName e' == name "nlword"
              , attr "place" e' == Just "1" || attr "place" e' == Nothing
              , attr "valsi" e' == attr "word" e = True
              | otherwise                        = False

-- Resolve gloss words to their lojban valsi entries.
resolve :: JbovlasteDB -> [Element] -> [Element]
resolve db = map resolve where
    resolve e | elName e == name "nlword"
              , Just v <- attr "valsi" e = fromMaybe e $ findValsi db v
              | otherwise                = e

-- Find a valsi.
findValsi :: JbovlasteDB -> String -> Maybe Element
findValsi db w = filterElement match $ elem $ db where
    match e | elName e == name "valsi"
            , Just w' <- attr "word" e = w' == w
            | otherwise                = False

instance Show JbovlasteDB where
    show (JDB path _) = "JbovlasteDB: " ++ path

-- | Opaque data type to be operated on.
data JbovlasteDB = JDB { path :: FilePath, elem :: Element }

instance Show JbovlasteEntry where
    show (Entry e g) = thetype ++ word ++ selmaho ++ rafsi ++ gloss ++ def ++ notes where
      thetype = fromMaybe "" $ attr "type" e
      word = maybe "" (\w -> " {" ++ w ++ "}") $ attr "word" e
      selmaho = maybe "" selmaho' (strContent <$> findChild (name "selmaho") e)
      selmaho' e = ", of selma'o {" ++ e ++ "}"
      rafsi = rafsi' $ commas $ map strContent $ findChildren (name "rafsi") e
      rafsi' "" = ""
      rafsi' rs = " with rafsi {" ++ rs ++ "}"
      def = maybe "" (": "++) $ strContent <$> findChild (name "definition") e
      notes = maybe "" notes' $ strContent <$> findChild (name "notes") e
      notes' n = " (" ++ n ++ ")"
      gloss = maybe "" ((", glossing to {"++) . (++"}")) g

instance Eq JbovlasteEntry where
    x == y = ((==) `on` entryType) x y && ((==) `on` entryWord) x y

-- | Opaque data type for entries.
data JbovlasteEntry = Entry { entryElem :: Element, _entryGloss :: Maybe String }
data JbovlasteEntryType = Gismu | Cmavo | Lujvo | Other deriving (Eq,Show,Ord)

-- Utilities
-- Make a simple name.
name n = QName n Nothing Nothing
-- Find an attribute of a specific name.
attr = findAttr . name
-- Just separate a list by commas.
commas = concat . intersperse ", "
--
lower = map toLower