-- | A module which provides an way to query XML exports of Jbovlaste. {-# LANGUAGE PatternGuards #-} module Lojban.Jbovlaste (-- * Types JbovlasteDB ,JbovlasteEntry ,JbovlasteEntryType(..) -- * Opening ,openJbovlaste -- * Querying ,valsi ,selma'o ,selrafsi ,selrafsis ,findByDef ,filterEntries -- * Inspection ,entryWord ,entryType ,entryGloss) where import Prelude hiding (readFile,elem) import System.IO.Strict (readFile) import Text.XML.Light.Input import Text.XML.Light.Types import Text.XML.Light.Proc import Control.Applicative import Data.Maybe import Data.List hiding (elem) import Data.Char import Data.Function import Lojban.Lujvo -- | Open an XML export of Jbovlaste for querying (strictly). openJbovlaste :: FilePath -> IO (Maybe JbovlasteDB) openJbovlaste path = (JDB path <$>) <$> parseXMLDoc <$> readFile path -- | Find the selrafsis of a lujvo. selrafsis :: JbovlasteDB -> String -> [String] selrafsis db = map fromJust . catMaybes . map ((entryWord <$>) . selrafsi db . show) . rafsis -- | Filter entries according to a predicate. filterEntries :: JbovlasteDB -> (JbovlasteEntry -> Bool) -> [JbovlasteEntry] filterEntries db f = (entry db <$>) . filterElements (f . entry db) $ elem db -- | Find (maybe) a valsi by rafsi. selrafsi :: JbovlasteDB -> String -> Maybe JbovlasteEntry selrafsi db t = entry db <$> filterElement match (elem db) where match e | any ((==t) . strContent) $ findChildren (name "rafsi") e = True | length t' >= 4 && isPrefixOf t (fromMaybe "" $ attr "word" e) && (attr "type" e == Just "gismu" || attr "type" e == Just "cmavo") = True | otherwise = False t' = filter (/='\'') t -- | Find valsi(s) by selma'o. selma'o :: JbovlasteDB -> String -> [JbovlasteEntry] selma'o db t = entry db <$> filterElements match (elem db) where match = any ((==lower t) . lower . strContent) . findChildren (name "selmaho") -- | Find a valsi by searching for word or gloss, and -- resolving gloss entries to valsi entries. valsi :: JbovlasteDB -> String -> [JbovlasteEntry] valsi db t = entry db <$> (resolve db $ filterElements match (elem db)) where match e | Just w <- attr "word" e = w == t | otherwise = False -- | Find valsis according to a predicate applied to the definition. findByDef :: JbovlasteDB -> (String -> Bool) -> [JbovlasteEntry] findByDef db t = entry db <$> filterElements match (elem db) where match = any (t . strContent) . findChildren (name "definition") -- | Inspect an entry for the word. entryWord :: JbovlasteEntry -> Maybe String entryWord = attr "word" . entryElem -- | Inspect an entry for the gloss. entryGloss :: JbovlasteEntry -> Maybe String entryGloss = _entryGloss -- | What type of word is the entry? entryType :: JbovlasteEntry -> JbovlasteEntryType entryType e = case attr "type" $ entryElem e of Just "cmavo" -> Cmavo Just "lujvo" -> Lujvo Just "gismu" -> Gismu _ -> Other -- Construct an entry. entry db e = Entry e (gloss db e) -- Find the gloss for a valsi. gloss :: JbovlasteDB -> Element -> Maybe String gloss db e = attr "word" =<< filterElement filter (elem db) where filter e' | elName e' == name "nlword" , attr "place" e' == Just "1" || attr "place" e' == Nothing , attr "valsi" e' == attr "word" e = True | otherwise = False -- Resolve gloss words to their lojban valsi entries. resolve :: JbovlasteDB -> [Element] -> [Element] resolve db = map resolve where resolve e | elName e == name "nlword" , Just v <- attr "valsi" e = fromMaybe e $ findValsi db v | otherwise = e -- Find a valsi. findValsi :: JbovlasteDB -> String -> Maybe Element findValsi db w = filterElement match $ elem $ db where match e | elName e == name "valsi" , Just w' <- attr "word" e = w' == w | otherwise = False instance Show JbovlasteDB where show (JDB path _) = "JbovlasteDB: " ++ path -- | Opaque data type to be operated on. data JbovlasteDB = JDB { path :: FilePath, elem :: Element } instance Show JbovlasteEntry where show (Entry e g) = thetype ++ word ++ selmaho ++ rafsi ++ gloss ++ def ++ notes where thetype = fromMaybe "" $ attr "type" e word = maybe "" (\w -> " {" ++ w ++ "}") $ attr "word" e selmaho = maybe "" selmaho' (strContent <$> findChild (name "selmaho") e) selmaho' e = ", of selma'o {" ++ e ++ "}" rafsi = rafsi' $ commas $ map strContent $ findChildren (name "rafsi") e rafsi' "" = "" rafsi' rs = " with rafsi {" ++ rs ++ "}" def = maybe "" (": "++) $ strContent <$> findChild (name "definition") e notes = maybe "" notes' $ strContent <$> findChild (name "notes") e notes' n = " (" ++ n ++ ")" gloss = maybe "" ((", glossing to {"++) . (++"}")) g instance Eq JbovlasteEntry where x == y = ((==) `on` entryType) x y && ((==) `on` entryWord) x y -- | Opaque data type for entries. data JbovlasteEntry = Entry { entryElem :: Element, _entryGloss :: Maybe String } data JbovlasteEntryType = Gismu | Cmavo | Lujvo | Other deriving (Eq,Show,Ord) -- Utilities -- Make a simple name. name n = QName n Nothing Nothing -- Find an attribute of a specific name. attr = findAttr . name -- Just separate a list by commas. commas = concat . intersperse ", " -- lower = map toLower