module Language.Lojban.Jbovlaste (-- * Types JboDB ,JboValsi ,JboValsiType(..) -- * Generation of the database ,genDB ,genDBString ,readDB -- * Querying the database ,findValsi ,filterValsi ,valsi ,defSub ,defWildCard -- * Accessing parts of valsis ,valsiWord ,valsiGloss ,valsiDef ,valsiRafsis ,valsiNotes ,valsiSelma'o ,valsiSelrafsis ,valsiType -- * Showing valsi ,showValsi ,showType ) where import Utils import qualified System.IO.Strict as StrictIO import Text.XML.Light.Input import Text.XML.Light.Types import Text.XML.Light.Proc import Data.Map (Map) import qualified Data.Map as M import Data.Maybe import Text.Regex import Data.List import Data.Ord import Language.Lojban.Lujvo import Data.Map (Map) import Control.Monad import Data.Function import WildCard import Data.Char -- | Find valsi(s) by word or gloss or rafsi. valsi :: JboDB -> String -> [JboValsi] valsi db s = sortBy (comparing valsiType) $ filterValsi db match where match e = valsiWord e == s || any (==s) (valsiGloss e) || (valsiType e == GismuType && (any (==s) (valsiRafsis e) || (length s == 4 && s `isPrefixOf` valsiWord e))) -- | Find valsi(s) by definition substring. defSub :: JboDB -> String -> [JboValsi] defSub db s = filterValsi db match where match | any (not . isLetter) s = match1 | otherwise = match2 match1 = isInfixOf (lower s) . lower . valsiDef match2 = any ((isPrefixOf $ lower s) . lower) . splitBy (not . isLetter) . valsiDef -- | Find valsi(s) by definition wild card string. defWildCard :: JboDB -> String -> [JboValsi] defWildCard db s = filterValsi db match where match = isNothing . wildcard s . valsiDef -- | Filter valsi(s) by a predicate. filterValsi :: JboDB -> (JboValsi -> Bool) -> [JboValsi] filterValsi db f = map snd $ filter (f . snd) $ M.assocs $ valsis $ db -- | Find valsi(s) by a predicate. findValsi :: JboDB -> (JboValsi -> Bool) -> Maybe JboValsi findValsi db f = (fmap snd) $ find (f . snd) $ M.assocs $ valsis $ db -- | Read in a database from file. readDB :: FilePath -> IO JboDB readDB f = (JboDB . genMapLujvo . valsis . read) `fmap` readFile f -- | Generate a database from the Jbovlaste XML export file. genDB :: FilePath -> IO JboDB genDB f = genDBString `fmap` StrictIO.readFile f -- | Generate a database from the Jbovlaste XML export. genDBString :: String -> JboDB genDBString = JboDB . genMap' . fromJust . parseXMLDoc genMap' :: Element -> Map String JboValsi genMap' xml = foldr update M.empty es where es = entries xml update e m | qname == name "valsi" && any (==type') ["gismu","fu'ivla","cmavo","lujvo","cmavo cluster"] = M.insert w (elemToValsi e es m) m | otherwise = m where qname = elName e w = fromJust $ attr "word" e type' = fromMaybe "" $ attr "type" e genMapLujvo :: Map String JboValsi -> Map String JboValsi genMapLujvo m = foldr resolve m $ M.toList m where resolve (w,v) = case valsiType v of LujvoType -> let (JboLujvo rs _ e) = v ss = catMaybes $ map (flip selrafsi m) rs in M.insert w (JboLujvo rs ss e) _ -> id entries :: Element -> [Element] entries = filterElements correct where correct e = qname == name "valsi" || qname == name "nlword" where qname = elName e elemToValsi :: Element -> [Element] -> Map String JboValsi -> JboValsi elemToValsi e es m | type' == "gismu" = makeGismu e es | type' == "fu'ivla" = makeFu'ivla e es | type' == "cmavo cluster" = makeCmavo True e es | type' == "cmavo" = makeCmavo False e es | type' == "lujvo" = makeLujvo e es m where type' = fromJust $ attr "type" e word = fromJust $ attr "word" e makeGismu :: Element -> [Element] -> JboValsi makeGismu e es = JboGismu rafsis (JboEntry word gloss def notes) where (rafsis,word,gloss,def,notes) = entryGet e es makeFu'ivla :: Element -> [Element] -> JboValsi makeFu'ivla e es = JboFu'ivla (JboEntry word gloss def notes) where (_,word,gloss,def,notes) = entryGet e es makeCmavo :: Bool -> Element -> [Element] -> JboValsi makeCmavo c e es = JboCmavo c rafsis selma'o (JboEntry word gloss def notes) where (rafsis,word,gloss,def,notes) = entryGet e es selma'o = subStr e "selmaho" makeLujvo :: Element -> [Element] -> Map String JboValsi -> JboValsi makeLujvo e es m = JboLujvo rafsis' [] (JboEntry word gloss def notes) where (_,word,gloss,def,notes) = entryGet e es rafsis' = rafsis word selrafsi :: String -> Map String JboValsi -> Maybe JboValsi selrafsi s = fmap snd . find sel . M.assocs where sel (w,v) = case valsiType v of GismuType -> (isPrefixOf s w && length s >= 4) || any (==s) rafsis CmavoType -> any (==s) rafsis _ -> False where rafsis = valsiRafsis v entryGet :: Element -> [Element] -> ([String],String,[String],String,String) entryGet e es = (rafsis,word,gloss,def,notes) where word = fromJust $ attr "word" e rafsis = map strContent $ filterElements ((==name "rafsi") . elName) e gloss = glosses word es def = subStr e "definition" notes = subStr e "notes" subStr e n = format $ fromMaybe "" $ strContent `fmap` findElement (name n) e glosses w es = map (fromMaybe "" . attr "word") $ sortBy (comparing $ attr "place") $ filter glossForThis es where glossForThis e = elName e == name "nlword" && attr "valsi" e == Just w -- Format descriptions, strip whitespace, change LateX parts to -- simple text, e.g. $x_1$ to x1. format :: String -> String format = trim . third . second . first where first = flip (subRegex (mkRegex "\\$([a-z]+)_([0-9]+)\\$")) "\\1\\2" second = flip (subRegex (mkRegex "\\$([a-z]+)_\\{([0-9]+)\\}\\$")) "\\1\\2" third = flip (subRegex (mkRegex "\\$([a-z]+)_([0-9]+)=([a-z]+)_([0-9]+)\\$")) "\\1\\2=\\3\\4" -- | Get the word of a valsi. valsiWord :: JboValsi -> String valsiWord = entry _entryWord -- | Get the gloss(es) of a valsi. valsiGloss :: JboValsi -> [String] valsiGloss = entry _entryGloss -- | Get the definition of a valsi. valsiDef :: JboValsi -> String valsiDef = entry _entryDef -- | Get the notes of a valsi. valsiNotes :: JboValsi -> String valsiNotes = entry _entryNotes -- | Get the selma'o of a cmavo (Nothing for non-cmavo). valsiSelma'o :: JboValsi -> Maybe String valsiSelma'o w = case valsiType w of CmavoType -> Just (_cmavoSelma'o w) _ -> Nothing -- | Get the selrafsis of a lujvo (empty list for non-lujvo). valsiSelrafsis :: JboValsi -> [JboValsi] valsiSelrafsis w = case valsiType w of LujvoType -> _lujvoSelrafsis w _ -> [] entry = (. join valsiEntry) valsiEntry w = case valsiType w of GismuType -> _gismuEntry CmavoType -> _cmavoEntry LujvoType -> _lujvoEntry Fu'ivlaType -> _fu'ivlaEntry -- | Get the type of a valsi. valsiType :: JboValsi -> JboValsiType valsiType (JboGismu _ _) = GismuType valsiType (JboCmavo _ _ _ _) = CmavoType valsiType (JboLujvo _ _ _) = LujvoType valsiType (JboFu'ivla _ ) = Fu'ivlaType showType w = case valsiType w of CmavoType | _cmavoCluster w -> "cmavo cluster" _ -> map toLower . takeWhile (/='T') . show $ valsiType w data JboDB = JboDB { valsis :: Map String JboValsi } deriving (Read,Show) data JboEntry = JboEntry { _entryWord :: String , _entryGloss :: [String] , _entryDef :: String , _entryNotes :: String } deriving (Read,Show) data JboValsiType = CmavoType | GismuType | LujvoType | Fu'ivlaType deriving (Eq,Show,Read,Ord) -- | An opaque data type on which accessors can be used. data JboValsi = JboGismu { _gismuRafsis :: [String] , _gismuEntry :: JboEntry } | JboCmavo { _cmavoCluster :: Bool , _cmavoRafsis :: [String] , _cmavoSelma'o :: String , _cmavoEntry :: JboEntry } | JboLujvo { _lujvoRafsis :: [String] , _lujvoSelrafsis :: [JboValsi] , _lujvoEntry :: JboEntry } | JboFu'ivla { _fu'ivlaEntry :: JboEntry } deriving (Show,Read) showValsi :: JboValsi -> String showValsi w = showType w ++ " " ++ braces (valsiWord w) ++ selma'o (valsiSelma'o w) ++ rafsis (valsiRafsis w) ++ selrafs (lujvoSelrafsis w) ++ selgloss (lujvoSelrafsis w) ++ glosses (valsiGloss w) ++ ": " ++ valsiDef w ++ notes (valsiNotes w) where rafsis = list "" ((", with rafsi "++) . braces . commas) selrafs = list "" ((", selrafsi "++) . braces . commas . map valsiWord) selgloss = list "" ((' ':) . parens . commas . map (slashes . valsiGloss)) selma'o = maybe "" (", of selma'o "++) glosses = list "" ((", glossing to "++) . commas . map speech) notes = list "" (" Notes: "++) instance Eq JboValsi where (==) = (==) `on` valsiWord instance Ord JboValsi where compare = compare `on` valsiType -- | Get the selrafsis of a lujvo. lujvoSelrafsis :: JboValsi -> [JboValsi] lujvoSelrafsis w = case valsiType w of LujvoType -> _lujvoSelrafsis w _ -> [] -- | Get any rafsis of a valsi. valsiRafsis :: JboValsi -> [String] valsiRafsis w = case valsiType w of GismuType -> _gismuRafsis w CmavoType -> _cmavoRafsis w LujvoType -> _lujvoRafsis w _ -> [] -- Utilities braces s = "{" ++ s ++ "}" parens s = "(" ++ s ++ ")" speech s = "\"" ++ s ++ "\"" commas = intercalate ", " . filter (not . null) slashes = intercalate "/" . filter (not . null) name n = QName n Nothing Nothing attr = findAttr . name lower = map toLower