{-# LANGUAGE PatternGuards, CPP #-} {-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Input.MODS -- Copyright : (c) Andrea Rossato -- License : BSD-style (see LICENSE) -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- An ugly MODS parser -- ----------------------------------------------------------------------------- module Text.CSL.Input.MODS where import Text.CSL.Eval ( split ) import Text.CSL.Output.Plain ( (<+>), tail' ) import Text.CSL.Pickle import Text.CSL.Reference import Text.CSL.Style ( betterThen ) import Data.Char ( isDigit, isLower ) import qualified Data.Map as M -- | Read a file with a single MODS record. readModsFile :: FilePath -> IO Reference readModsFile = readXmlFile xpMods -- | Read a file with a collection of MODS records. readModsCollectionFile :: FilePath -> IO [Reference] readModsCollectionFile = readXmlFile xpModsCollection xpModsCollection :: PU [Reference] xpModsCollection = xpIElem "modsCollection" $ xpList xpMods xpMods :: PU Reference xpMods = xpIElem "mods" xpReference xpReference :: PU Reference xpReference = xpWrap ( \ ( ref , (ck,ty,ti,i,d) ,((au,ed,tr),(re,it,pu'),(co,ce)) ,((di',pg,vl,is),(nu,sc,ch)) , (di,ac,pu,pp) , ((ac',uri),no) ) -> ref { refId = ck `betterThen` take 10 (concat $ words ti) , refType = if ty == NoType then refType ref else ty , title = ti , author = au , editor = ed `betterThen` editor ref , translator = tr `betterThen` translator ref , recipient = re `betterThen` recipient ref , interviewer = it `betterThen` interviewer ref , composer = co `betterThen` composer ref , collectionEditor = ce `betterThen` collectionEditor ref , publisherPlace = pp `betterThen` publisherPlace ref , containerAuthor = containerAuthor ref , url = uri , note = no , isbn = i , doi = d , issued = issued ref `betterThen` di `betterThen` di' , accessed = accessed ref `betterThen` ac `betterThen` ac' , page = page ref `betterThen` pg , volume = volume ref `betterThen` vl , issue = issue ref `betterThen` is , number = number ref `betterThen` nu , section = section ref `betterThen` sc , chapterNumber = chapterNumber ref `betterThen` ch , publisher = (foldr (<+>) [] . map show $ pu) `betterThen` publisher ref `betterThen` (foldr (<+>) [] . map show $ pu') } , \r -> ( r , (refId r, refType r, title r, isbn r, doi r) ,((author r, editor r, translator r) ,(recipient r, interviewer r, emptyAgents ) ,(composer r, collectionEditor r)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r)) , (issued r, accessed r, emptyAgents, publisherPlace r) ,((accessed r, url r), note r) )) $ xp6Tuple (xpDefault emptyReference xpRelatedItem) (xp5Tuple xpCiteKey xpRefType xpTitle xpIsbn xpDoi) xpAgents xpPart xpOrigin (xpPair xpUrl xpNote) xpCiteKey :: PU String xpCiteKey = xpDefault [] $ xpChoice (xpAttr "ID" xpText) (xpElemWithAttrValue "identifier" "type" "citekey" xpText) xpLift xpOrigin :: PU ([RefDate],[RefDate],[Agent],String) xpOrigin = xpDefault ([],[],[],[]) . xpIElem "originInfo" $ xp4Tuple (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateIssued" xpText0) (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateCaptured" xpText0) (xpDefault [] $ xpList $ xpWrap (\s -> Agent [] [] [] s [] [] False, show) $ xpIElem "publisher" xpText0) (xpDefault [] $ xpIElem "place" $ xpIElem "placeTerm" xpText0) xpRefType :: PU RefType xpRefType = xpDefault NoType $ xpWrap (readRefType, const []) xpGenre xpGenre :: PU [String] xpGenre = xpList $ xpIElem "genre" $ xpChoice xpZero (xpPair (xpDefault [] $ xpAttr "authority" xpText) xpText) $ xpLift . snd xpRelatedItem :: PU Reference xpRelatedItem = xpIElem "relatedItem" . xpAddFixedAttr "type" "host" $ xpWrap ( \( (ty,ct) ,((ca,ed,tr),(re,it,pu'),(co,ce)) ,((di,pg,vl,is),(nu,sc,ch)) , (di',ac,pu,pp) ) -> emptyReference { refType = ty , containerAuthor = ca , containerTitle = ct , editor = ed , translator = tr , recipient = re , interviewer = it , publisher = foldr (<+>) [] . map show $ pu `betterThen` pu' , publisherPlace = pp , composer = co , collectionEditor = ce , issued = di `betterThen` di' , accessed = ac , page = pg , volume = vl , issue = is , number = nu , section = sc , chapterNumber = ch } , \r -> ( (refType r, containerTitle r) ,((containerAuthor r, editor r, translator r) ,(recipient r, interviewer r, emptyAgents ) ,(composer r, collectionEditor r)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r)) , (issued r, accessed r,emptyAgents, publisherPlace r) )) $ xp4Tuple (xpPair xpRefType xpTitle) xpAgents xpPart xpOrigin -- FIXME: join title and subtitle correctly: usare Title per shortTitle. xpTitle :: PU String xpTitle = xpWrap (uncurry (<+>), \s -> (s,[])) $ xpIElem "titleInfo" $ xpPair (xpIElem "title" xpText0) (xpDefault [] $ xpIElem "subTitle" xpText0) xpAgents :: PU (([Agent],[Agent],[Agent]) ,([Agent],[Agent],[Agent]) ,([Agent],[Agent])) xpAgents = xpTriple (xpTriple (xpAgent "author" "aut") (xpAgent "editor" "edt") (xpAgent "translator" "trl")) (xpTriple (xpAgent "recipient" "rcp") (xpAgent "interviewer" "ivr") (xpAgent "publisher" "pbl")) (xpPair (xpAgent "composer" "cmp") (xpAgent "collector" "xol")) xpAgent :: String -> String -> PU [Agent] xpAgent sa sb = xpDefault [] $ xpList $ xpIElem "name" $ xpChoice xpZero (xpIElem "role" $ xpIElem "roleTerm" xpText0) (\x -> if x == sa || x == sb then xpickle else xpZero) instance XmlPickler Agent where xpickle = xpWrap ( uncurry parseName , \(Agent gn _ _ fn _ _ _) -> (gn,fn)) $ xpAddFixedAttr "type" "personal" xpNameData -- | "von Hicks,! Jr., Michael" or "la Martine,! III, Martin B. de" or -- "Rossato, Jr., Andrea G. B." or "Paul, III, Juan". parseName :: [String] -> String -> Agent parseName gn fn | ("!":sf:",":xs) <- gn = parse xs (sf ++ ".") True | ("!":sf :xs) <- gn , sf /= [] , last sf == ',' = parse xs sf True | (sf:",":xs) <- gn = parse xs (sf ++ ".") False | (sf :xs) <- gn , sf /= [], last sf == ',' = parse xs sf False | otherwise = parse gn "" False where parse g s b = Agent (getGiven g) (getDrop g) (getNonDrop fn) (getFamily fn) s [] b setInit s = if length s == 1 then s ++ "." else s getDrop = unwords . reverse . takeWhile (and . map isLower) . reverse getGiven = map setInit . reverse . dropWhile (and . map isLower) . reverse getNonDrop = unwords . takeWhile (and . map isLower) . words getFamily = unwords . dropWhile (and . map isLower) . words xpNameData :: PU ([String],String) xpNameData = xpWrap (readName,const []) $ xpList $ xpElem "namePart" $ xpPair (xpAttr "type" xpText) xpText0 where readName x = (readg x, readf x) readf = foldr (\(k,v) xs -> if k == "family" then v else xs) [] readg = foldr (\(k,v) xs -> if k == "given" then v:xs else xs) [] xpPart :: PU (([RefDate],String,String,String) ,(String,String,String)) xpPart = xpDefault none . xpIElem "part" . xpWrap (readIt none,const []) $ xpList xpDetail where none = (([],"","",""),("","","")) readIt r [] = r readIt acc@((d,p,v,i),(n,s,c)) (x:xs) | Date y <- x = readIt ((y,p,v,i),(n,s,c)) xs | Page y <- x = readIt ((d,y,v,i),(n,s,c)) xs | Volume y <- x = readIt ((d,p,y,i),(n,s,c)) xs | Issue y <- x = readIt ((d,p,v,y),(n,s,c)) xs | Number y <- x = readIt ((d,p,v,i),(y,s,c)) xs | ChapterNr y <- x = readIt ((d,p,v,i),(n,s,y)) xs | Section y <- x = readIt ((d,p,v,i),(n,y,c)) xs | otherwise = acc data Detail = Date [RefDate] | Page String | Volume String | Issue String | Number String | ChapterNr String | Section String deriving ( Eq, Show ) xpDetail :: PU Detail xpDetail = xpAlt tag ps where tag _ = 0 ps = [ xpWrap (Date, const []) $ xpDate , xpWrap (Page, show) $ xpPage , xpWrap (Volume, show) $ xp "volume" , xpWrap (Issue, show) $ xp "issue" , xpWrap (Number, show) $ xp "number" , xpWrap (Section, show) $ xp "section" , xpWrap (ChapterNr,show) $ xp "chapter" ] xpDate = xpWrap (readDate,show) (xpElem "date" xpText0) xp s = xpElemWithAttrValue "detail" "type" s $ xpElem "number" xpText xpPage :: PU String xpPage = xpChoice (xpElemWithAttrValue "detail" "type" "page" $ xpIElem "number" xpText) (xpElemWithAttrValue "extent" "unit" "page" $ xpPair (xpElem "start" xpText) (xpElem "end" xpText)) (\(s,e) -> xpLift (s ++ "-" ++ e)) xpUrl :: PU ([RefDate],String) xpUrl = xpDefault ([],[]) . xpIElem "location" $ xpPair (xpWrap (readDate,show) $ xpDefault [] $ xpAttr "dateLastAccessed" xpText) (xpDefault [] $ xpElem "url" xpText) xpIsbn :: PU String xpIsbn = xpDefault [] $ xpIdentifier "isbn" xpDoi :: PU String xpDoi = xpDefault [] $ xpIdentifier "doi" xpIdentifier :: String -> PU String xpIdentifier i = xpIElem "identifier" $ xpAddFixedAttr "type" i xpText xpNote :: PU (String) xpNote = xpDefault [] $ xpIElem "note" xpText readDate :: String -> [RefDate] readDate s = (parseDate $ takeWhile (/= '/') s) ++ (parseDate . tail' $ dropWhile (/= '/') s) -- | Possible formats: "YYYY", "YYYY-MM", "YYYY-MM-DD". parseDate :: String -> [RefDate] parseDate s = case split (== '-') (unwords $ words s) of [y,m,d] -> [RefDate y m [] d [] []] [y,m] -> [RefDate y m [] [] [] []] [y] -> if and (map isDigit y) then [RefDate y [] [] [] [] []] else [RefDate [] [] [] [] y []] _ -> [] emptyAgents :: [Agent] emptyAgents = [] readRefType :: [String] -> RefType readRefType [] = NoType readRefType (t:_) = case M.lookup t genreTypeMapping of Just x -> x Nothing -> ArticleJournal -- Reasonable default (?) -- The string constants come from http://www.loc.gov/standards/valuelist/marcgt.html, which are used in the -- "" element (http://www.loc.gov/standards/mods/userguide/genre.html) genreTypeMapping :: M.Map String RefType genreTypeMapping = M.fromList [ ( "book", Book ) , ( "periodical", ArticleJournal ) , ( "newspaper", ArticleNewspaper ) , ( "encyclopedia", EntryEncyclopedia ) , ( "conference publication", PaperConference ) , ( "academic journal", ArticleJournal ) , ( "collection", Chapter ) , ( "legal case and case notes", LegalCase ) , ( "legislation", Legislation ) , ( "motion picutre", MotionPicture ) , ( "patent", Patent ) , ( "review", Review ) , ( "thesis", Thesis ) , ( "web page", Webpage ) , ( "webpage", Webpage ) , ( "web site", Webpage ) ]