module Folgerhs.Parse ( parseCorpus , corpus , parse ) where import Data.Maybe import Data.List import Text.XML.Light.Input (parseXML) import Text.XML.Light.Proc (onlyElems, elChildren) import Text.XML.Light.Types (QName (..), Element (..), Content, Attr (..) ) import Folgerhs.Stage isTag :: String -> Element -> Bool isTag n = (==) n . qName . elName drillTagPath :: [String] -> [Element] -> [Element] drillTagPath [] = id drillTagPath (n:ns) = drillTagPath ns . concatMap elChildren . filter (isTag n) attr :: String -> Element -> Maybe String attr n = listToMaybe . map attrVal . filter ((==) n . qName . attrKey) . elAttribs descendants :: Element -> [Element] descendants e = e : concatMap descendants (elChildren e) corpus :: [Content] -> [Element] corpus = concatMap descendants . drillTagPath ["TEI", "text", "body"] . onlyElems charName :: String -> Character charName c = let n = fromMaybe c (stripPrefix "#" c) in case span (/= '_') $ reverse n of ("", p) -> p (s, "") -> s (s, p) -> reverse $ tail p parseElement :: Element -> Maybe StageEvent parseElement el | isTag "milestone" el = case (attr "unit" el, attr "n" el) of (Just "ftln", Just n) -> Just $ Milestone n _ -> Nothing | isTag "sp" el = case attr "who" el of Just s -> Just $ Speech (charName s) _ -> Nothing | isTag "stage" el = case (attr "type" el, attr "who" el) of (Just "entrance", Just cs) -> Just $ Entrance (map charName $ words cs) (Just "exit", Just cs) -> Just $ Exit (map charName $ words cs) _ -> Nothing | otherwise = Nothing parseCorpus :: [Element] -> [StageEvent] parseCorpus [] = [] parseCorpus (e:es) = case parseElement e of Just se -> se : parseCorpus es Nothing -> parseCorpus es parse :: String -> [StageEvent] parse input = let content = parseXML input in parseCorpus (corpus content)