{-# LANGUAGE PatternGuards, CPP #-} {-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Input.MODS -- Copyright : (c) Andrea Rossato -- License : BSD-style (see LICENSE) -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- An ugly MODS parser -- ----------------------------------------------------------------------------- module Text.CSL.Input.MODS where import Text.CSL.Output.Plain ( (<+>) ) import Text.CSL.Reference import Text.CSL.Pickle import Text.CSL.Style ( betterThen ) import Data.Char ( isDigit, isLower ) #ifdef USE_HXT import Text.XML.HXT.Arrow.Pickle.Xml #endif -- | Read a file with a single MODS record. readModsFile :: FilePath -> IO Reference readModsFile = readXmlFile xpMods -- | Read a file with a collection of MODS records. readModsCollectionFile :: FilePath -> IO [Reference] readModsCollectionFile = readXmlFile xpModsCollection xpModsCollection :: PU [Reference] xpModsCollection = xpIElem "modsCollection" $ xpList xpMods xpMods :: PU Reference xpMods = xpIElem "mods" xpReference xpReference :: PU Reference xpReference = xpWrap ( \ ( ref , (ck,ty,ti) ,((au,ed,tr),(re,it,pu'),(co,ce)) ,((di',pg,vl,is),(nu,sc,ch)) , (di,pu,pp) , (ac,uri) ) -> ref { refId = ck `betterThen` take 10 (concat $ words ti) , refType = if refType ref /= NoType then refType ref else ty , title = ti , author = au , editor = ed `betterThen` editor ref , translator = tr `betterThen` translator ref , recipient = re `betterThen` recipient ref , interviewer = it `betterThen` interviewer ref , composer = co `betterThen` composer ref , collectionEditor = ce `betterThen` collectionEditor ref , publisherPlace = pp `betterThen` publisherPlace ref , containerAuthor = containerAuthor ref , url = uri , accessed = ac , issued = issued ref `betterThen` di `betterThen` di' , page = page ref `betterThen` pg , volume = volume ref `betterThen` vl , issue = issue ref `betterThen` is , number = number ref `betterThen` nu , section = section ref `betterThen` sc , chapterNumber = chapterNumber ref `betterThen` ch , publisher = (foldr (<+>) [] . map show $ pu) `betterThen` publisher ref `betterThen` (foldr (<+>) [] . map show $ pu') } , \r -> ( r , (refId r, refType r, title r) ,((author r, editor r, translator r) ,(recipient r, interviewer r, emptyAgents ) ,(composer r, collectionEditor r)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r)) , (issued r, emptyAgents, publisherPlace r) , (accessed r, url r) )) $ xp6Tuple (xpDefault emptyReference xpRelatedItem) (xpTriple xpCiteKey xpRefType xpTitle ) xpAgents xpPart xpOrigin xpUrl xpCiteKey :: PU String xpCiteKey = xpDefault [] $ xpChoice (xpAttr "ID" xpText) (xpIElem "identifier" xpText) xpLift xpOrigin :: PU ([RefDate],[Agent],String) xpOrigin = xpDefault ([],[],[]) . xpIElem "originInfo" $ xpTriple (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateIssued" xpText0) (xpDefault [] $ xpList $ xpWrap (\s -> Agent [] [] [] s [] [] False, show) $ xpIElem "publisher" xpText0) (xpDefault [] $ xpIElem "place" $ xpIElem "placeTerm" xpText0) xpRefType :: PU RefType xpRefType = xpDefault NoType $ xpWrap (readType, const []) xpGenre where readType [] = NoType readType (t:_) | "conference publication" <- t = PaperConference | "periodical" <- t = ArticleJournal | otherwise = Book xpRefType' :: PU RefType xpRefType' = xpDefault NoType $ xpWrap (readTypeIn, const []) xpGenre where readTypeIn [] = NoType readTypeIn t | "book" `elem` t = Chapter | "conference publication" `elem` t = PaperConference | "academic journal" `elem` t = ArticleJournal | "collection" `elem` t = Chapter | otherwise = ArticleJournal xpGenre :: PU [String] xpGenre = xpList $ xpIElem "genre" $ xpChoice xpZero (xpPair (xpDefault [] $ xpAttr "authority" xpText) xpText) $ xpLift . snd xpRelatedItem :: PU Reference xpRelatedItem = xpIElem "relatedItem" . xpAddFixedAttr "type" "host" $ xpWrap ( \( (ty,ct) ,((ca,ed,tr),(re,it,pu'),(co,ce)) ,((di,pg,vl,is),(nu,sc,ch)) , (di',pu,pp) ) -> emptyReference { refType = ty , containerAuthor = ca , containerTitle = ct , editor = ed , translator = tr , recipient = re , interviewer = it , publisher = foldr (<+>) [] . map show $ pu `betterThen` pu' , publisherPlace = pp , composer = co , collectionEditor = ce , issued = di `betterThen` di' , page = pg , volume = vl , issue = is , number = nu , section = sc , chapterNumber = ch } , \r -> ( (refType r, containerTitle r) ,((containerAuthor r, editor r, translator r) ,(recipient r, interviewer r, emptyAgents ) ,(composer r, collectionEditor r)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r)) , (issued r, emptyAgents, publisherPlace r) )) $ xp4Tuple (xpPair xpRefType' xpTitle) xpAgents xpPart xpOrigin -- FIXME: join title and subtitle correctly: usare Title per shortTitle. xpTitle :: PU String xpTitle = xpWrap (uncurry (<+>), \s -> (s,[])) $ xpIElem "titleInfo" $ xpPair (xpIElem "title" xpText0) (xpDefault [] $ xpIElem "subTitle" xpText0) xpAgents :: PU (([Agent],[Agent],[Agent]) ,([Agent],[Agent],[Agent]) ,([Agent],[Agent])) xpAgents = xpTriple (xpTriple (xpAgent "author" "aut") (xpAgent "editor" "edt") (xpAgent "translator" "trl")) (xpTriple (xpAgent "recipient" "rcp") (xpAgent "interviewer" "ivr") (xpAgent "publisher" "pbl")) (xpPair (xpAgent "composer" "cmp") (xpAgent "collector" "xol")) xpAgent :: String -> String -> PU [Agent] xpAgent sa sb = xpDefault [] $ xpList $ xpIElem "name" $ xpChoice xpZero (xpIElem "role" $ xpIElem "roleTerm" xpText0) (\x -> if x == sa || x == sb then xpickle else xpZero) instance XmlPickler Agent where xpickle = xpWrap ( uncurry parseName , \(Agent gn _ _ fn _ _ _) -> (gn,fn)) $ xpAddFixedAttr "type" "personal" xpNameData -- | "von Hicks,! Jr., Michael" or "la Martine,! III, Martin B. de" or -- "Rossato, Jr., Andrea G. B." or "Paul, III, Juan". parseName :: [String] -> String -> Agent parseName gn fn | ("!":sf:",":xs) <- gn = parse xs (sf ++ ".") True | ("!":sf :xs) <- gn , sf /= [] , last sf == ',' = parse xs sf True | (sf:",":xs) <- gn = parse xs (sf ++ ".") False | (sf :xs) <- gn , sf /= [], last sf == ',' = parse xs sf False | otherwise = parse gn "" False where parse g s b = Agent (getGiven g) (getDrop g) (getNonDrop fn) (getFamily fn) s [] b getDrop = unwords . filter (and . map isLower) getGiven = filter (not . and . map isLower) getNonDrop = getDrop . words getFamily = unwords . getGiven . words xpNameData :: PU ([String],String) xpNameData = xpWrap (readName,const []) $ xpList $ xpElem "namePart" $ xpPair (xpAttr "type" xpText) xpText0 where readName x = (readg x, readf x) readf = foldr (\(k,v) xs -> if k == "family" then v else xs) [] readg = foldr (\(k,v) xs -> if k == "given" then v:xs else xs) [] xpPart :: PU (([RefDate],String,String,String) ,(String,String,String)) xpPart = xpDefault none . xpIElem "part" . xpWrap (readIt none,const []) $ xpList xpDetail where none = (([],"","",""),("","","")) readIt r [] = r readIt acc@((d,p,v,i),(n,s,c)) (x:xs) | Date y <- x = readIt ((y,p,v,i),(n,s,c)) xs | Page y <- x = readIt ((d,y,v,i),(n,s,c)) xs | Volume y <- x = readIt ((d,p,y,i),(n,s,c)) xs | Issue y <- x = readIt ((d,p,v,y),(n,s,c)) xs | Number y <- x = readIt ((d,p,v,i),(y,s,c)) xs | ChapterNr y <- x = readIt ((d,p,v,i),(n,s,y)) xs | Section y <- x = readIt ((d,p,v,i),(n,y,c)) xs | otherwise = acc data Detail = Date [RefDate] | Page String | Volume String | Issue String | Number String | ChapterNr String | Section String deriving ( Eq, Show ) xpDetail :: PU Detail xpDetail = xpAlt tag ps where tag _ = 0 ps = [ xpWrap (Date, const []) $ xpDate , xpWrap (Page, show) $ xpPage , xpWrap (Volume, show) $ xp "volume" , xpWrap (Issue, show) $ xp "issue" , xpWrap (Number, show) $ xp "number" , xpWrap (Section, show) $ xp "section" , xpWrap (ChapterNr,show) $ xp "chapter" ] xpDate = xpWrap (readDate,show) (xpElem "date" xpText0) xp s = xpElemWithAttrValue "detail" "type" s $ xpElem "number" xpText xpPage :: PU String xpPage = xpChoice (xpElemWithAttrValue "detail" "type" "page" $ xpIElem "number" xpText) (xpElemWithAttrValue "extent" "unit" "page" $ xpPair (xpElem "start" xpText) (xpElem "end" xpText)) (\(s,e) -> xpLift (s ++ "-" ++ e)) xpUrl :: PU ([RefDate],String) xpUrl = xpDefault ([],[]) . xpIElem "location" $ xpPair (xpWrap (readDate,show) $ xpDefault [] $ xpAttr "dateLastAccessed" xpText) (xpDefault [] $ xpElem "url" xpText) readDate :: String -> [RefDate] readDate s = if takeWhile isDigit s /= [] then return $ RefDate (takeWhile isDigit s) [] [] [] [] [] else [] emptyAgents :: [Agent] emptyAgents = []