module Data.PoliMorf
(
Form
, Base
, Tag
, Entry (..)
, readPoliMorf
, parsePoliMorf
, BaseMap
, mkBaseMap
, RelCode (..)
, merge
) where
import Control.Applicative ((<$>), (<*>))
import Data.Monoid (Monoid, mappend)
import Data.List (foldl')
import Data.Binary (Binary, get, put)
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.IO as L
type Form = T.Text
type Base = T.Text
type Tag = T.Text
data Entry = Entry
{ form :: !Form
, base :: !Base
, tag :: !Tag }
deriving (Eq, Ord, Show, Read)
readPoliMorf :: FilePath -> IO [Entry]
readPoliMorf path = parsePoliMorf <$> L.readFile path
parsePoliMorf :: L.Text -> [Entry]
parsePoliMorf = map parsePoliRow . L.lines
parsePoliRow :: L.Text -> Entry
parsePoliRow row = case map L.toStrict (L.split (=='\t') row) of
[_form, _base, _tag] -> Entry _form _base _tag
_ -> error $ "parsePoliRow: invalid row \"" ++ L.unpack row ++ "\""
type BaseMap = M.Map Form (S.Set Base)
mkBaseMap :: [Entry] -> BaseMap
mkBaseMap = M.fromListWith S.union . map ((,) <$> form <*> S.singleton . base)
data RelCode
= Exact
| ByBase
| ByForm
deriving (Eq, Ord, Show, Read)
instance Binary RelCode where
put Exact = put '1'
put ByBase = put '2'
put ByForm = put '3'
get = get >>= \x -> return $ case x of
'1' -> Exact
'2' -> ByBase
'3' -> ByForm
c -> error $ "get: invalid RelCode code '" ++ [c] ++ "'"
merge :: Monoid m => BaseMap -> M.Map Form m -> M.Map Form (Maybe (m, RelCode))
merge poli dict0 =
M.fromList [(x, combine x) | x <- keys]
where
keys = S.toList (M.keysSet poli `S.union` M.keysSet dict0)
combine x
| Just y <- M.lookup x dict0 = Just (y, Exact)
| Just y <- M.lookup x dict1 = Just (y, ByBase)
| Just y <- M.lookup x dict2 = Just (y, ByForm)
| otherwise = Nothing
dict1 = fromListWith mappend
[ (lemma, x)
| (_form, x) <- M.assocs dict0
, lemma <- elemsOn poli _form ]
dict2 = fromListWith mappend
[ (form', x)
| (_form, x) <- M.assocs dict0
, lemma <- elemsOn poli _form
, form' <- elemsOn ilop lemma ]
ilop = fromListWith mappend
[ (lemma, S.singleton _form)
| (_form, lemmas) <- M.assocs poli
, lemma <- S.toList lemmas ]
elemsOn :: (Ord a, Ord b) => M.Map a (S.Set b) -> a -> [b]
elemsOn m x = case x `M.lookup` m of
Just s -> S.toList s
Nothing -> []
fromListWith :: Ord k => (a -> a -> a) -> [(k, a)] -> M.Map k a
fromListWith f xs =
let update m (!k, !x) = M.insertWith' f k x m
in foldl' update M.empty xs