{-# LANGUAGE PatternGuards, CPP #-} {-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Input.MODS -- Copyright : (c) Andrea Rossato -- License : BSD-style (see LICENSE) -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- An ugly MODS parser -- ----------------------------------------------------------------------------- module Text.CSL.Input.MODS where import Text.CSL.Eval ( split ) import Text.CSL.Output.Plain ( (<+>), tail' ) import Text.CSL.Pickle import Text.CSL.Reference import Text.CSL.Style ( betterThen ) import Data.Char ( isDigit, isLower ) import qualified Data.Map as M -- | Read a file with a single MODS record. readModsFile :: FilePath -> IO Reference readModsFile = readXmlFile xpMods -- | Read a file with a collection of MODS records. readModsCollectionFile :: FilePath -> IO [Reference] readModsCollectionFile = readXmlFile xpModsCollection xpModsCollection :: PU [Reference] xpModsCollection = xpIElem "modsCollection" $ xpList xpMods xpMods :: PU Reference xpMods = xpIElem "mods" xpReference xpReference :: PU Reference xpReference = xpWrap ( \ ( ref , (ck,ty,ti,i,d) ,((au,ed,tr),(re,it,pu'),(co,ce,dg,om)) ,((di',pg,vl,is),(nu,sc,ch)) , (di,ac,pu,pp,et) , ((ac',uri),no) ) -> ref { refId = ck `betterThen` take 10 (concat $ words ti) , refType = if ty /= NoType then ty else if refType ref == Book then Chapter else refType ref , title = ti , author = au , editor = ed `betterThen` editor ref , edition = et `betterThen` edition ref , translator = tr `betterThen` translator ref , recipient = re `betterThen` recipient ref , interviewer = it `betterThen` interviewer ref , composer = co `betterThen` composer ref , collectionEditor = ce `betterThen` collectionEditor ref , publisherPlace = pp `betterThen` publisherPlace ref , containerAuthor = containerAuthor ref , url = uri , note = no , isbn = i , doi = d , issued = issued ref `betterThen` di `betterThen` di' , accessed = accessed ref `betterThen` ac `betterThen` ac' , page = page ref `betterThen` pg , volume = volume ref `betterThen` vl , issue = issue ref `betterThen` is `betterThen` number ref `betterThen` nu , number = number ref `betterThen` nu , section = section ref `betterThen` sc , chapterNumber = chapterNumber ref `betterThen` ch , publisher = fromAgent pu `betterThen` publisher ref `betterThen` fromAgent pu' `betterThen` fromAgent dg `betterThen` fromAgent om } , \r -> ( r , (refId r, refType r, title r, isbn r, doi r) ,((author r, editor r, translator r) ,(recipient r, interviewer r, emptyAgents ) ,(composer r, collectionEditor r, emptyAgents, emptyAgents)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r)) , (issued r, accessed r, emptyAgents, publisherPlace r, edition r) ,((accessed r, url r), note r) )) $ xp6Tuple (xpDefault emptyReference xpRelatedItem) (xp5Tuple xpCiteKey xpRefType xpTitle xpIsbn xpDoi) xpAgents xpPart xpOrigin (xpPair xpUrl xpNote) xpCiteKey :: PU String xpCiteKey = xpDefault [] $ xpChoice (xpAttr "ID" xpText) (xpElemWithAttrValue "identifier" "type" "citekey" xpText) xpLift xpOrigin :: PU ([RefDate],[RefDate],[Agent],String,String) xpOrigin = xpDefault ([],[],[],[],[]) . xpIElem "originInfo" $ xp5Tuple (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateIssued" xpText0) (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateCaptured" xpText0) (xpDefault [] $ xpList $ xpWrap (\s -> Agent [] [] [] s [] [] False, show) $ xpIElem "publisher" xpText0) (xpDefault [] $ xpIElem "place" $ xpIElem "placeTerm" xpText0) (xpDefault [] $ xpIElem "edition" $ xpText0) xpRefType :: PU RefType xpRefType = xpDefault NoType $ xpWrap (readRefType, const []) xpGenre xpGenre :: PU [String] xpGenre = xpList $ xpIElem "genre" $ xpChoice xpZero (xpPair (xpDefault [] $ xpAttr "authority" xpText) xpText) $ xpLift . snd xpRelatedItem :: PU Reference xpRelatedItem = xpIElem "relatedItem" . xpAddFixedAttr "type" "host" $ xpWrap ( \( (ty,ct) ,((ca,ed,tr),(re,it,pu'),(co,ce,dg,om)) ,((di,pg,vl,is),(nu,sc,ch)) , (di',ac,pu,pp,et) ) -> emptyReference { refType = ty , containerAuthor = ca , containerTitle = ct , editor = ed , edition = et , translator = tr , recipient = re , interviewer = it , publisherPlace = pp , composer = co , collectionEditor = ce , issued = di `betterThen` di' , accessed = ac , page = pg , volume = vl , issue = is `betterThen` nu , number = nu , section = sc , chapterNumber = ch , publisher = fromAgent $ pu `betterThen` pu' `betterThen` dg `betterThen` om } , \r -> ( (refType r, containerTitle r) ,((containerAuthor r, editor r, translator r) ,(recipient r, interviewer r, emptyAgents ) ,(composer r, collectionEditor r, emptyAgents , emptyAgents)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r)) , (issued r, accessed r,emptyAgents, publisherPlace r, edition r) )) $ xp4Tuple (xpPair xpRefType xpTitle) xpAgents xpPart xpOrigin -- FIXME: join title and subtitle correctly: usare Title per shortTitle. xpTitle :: PU String xpTitle = xpWrap (uncurry (<+>), \s -> (s,[])) $ xpIElem "titleInfo" $ xpPair (xpIElem "title" xpText0) (xpDefault [] $ xpIElem "subTitle" xpText0) xpAgents :: PU (([Agent],[Agent],[Agent]) ,([Agent],[Agent],[Agent]) ,([Agent],[Agent],[Agent],[Agent])) xpAgents = xpTriple (xpTriple (xpAgent "author" "aut") (xpAgent "editor" "edt") (xpAgent "translator" "trl")) (xpTriple (xpAgent "recipient" "rcp") (xpAgent "interviewer" "ivr") (xpAgent "publisher" "pbl")) (xp4Tuple (xpAgent "composer" "cmp") (xpAgent "collector" "xol") (xpAgent "degree grantor" "dgg") (xpAgent "organizer of meeting" "orm")) xpAgent :: String -> String -> PU [Agent] xpAgent sa sb = xpDefault [] $ xpList $ xpIElem "name" $ xpChoice xpZero (xpIElem "role" $ xpIElem "roleTerm" xpText0) (\x -> if x == sa || x == sb then xpickle else xpZero) instance XmlPickler Agent where xpickle = xpAlt tag ps where tag _ = 0 ps = [ personal, others ] personal = xpWrap ( uncurry parseName , \(Agent gn _ _ fn _ _ _) -> (gn,fn)) $ xpAddFixedAttr "type" "personal" xpNameData others =xpWrap (\s -> Agent [] [] [] [] [] s False, undefined) $ xpElem "namePart" xpText0 -- | "von Hicks,! Jr., Michael" or "la Martine,! III, Martin B. de" or -- "Rossato, Jr., Andrea G. B." or "Paul, III, Juan". parseName :: [String] -> String -> Agent parseName gn fn | ("!":sf:",":xs) <- gn = parse xs (sf ++ ".") True | ("!":sf :xs) <- gn , sf /= [] , last sf == ',' = parse xs sf True | (sf:",":xs) <- gn = parse xs (sf ++ ".") False | (sf :xs) <- gn , sf /= [], last sf == ',' = parse xs sf False | otherwise = parse gn "" False where parse g s b = Agent (getGiven g) (getDrop g) (getNonDrop fn) (getFamily fn) s [] b setInit s = if length s == 1 then s ++ "." else s getDrop = unwords . reverse . takeWhile (and . map isLower) . reverse getGiven = map setInit . reverse . dropWhile (and . map isLower) . reverse getNonDrop = unwords . takeWhile (and . map isLower) . words getFamily = unwords . dropWhile (and . map isLower) . words xpNameData :: PU ([String],String) xpNameData = xpWrap (readName,const []) $ xpList $ xpElem "namePart" $ xpPair (xpAttr "type" xpText) xpText0 where readName x = (readg x, readf x) readf = foldr (\(k,v) xs -> if k == "family" then v else xs) [] readg = foldr (\(k,v) xs -> if k == "given" then v:xs else xs) [] xpPart :: PU (([RefDate],String,String,String) ,(String,String,String)) xpPart = xpDefault none . xpIElem "part" . xpWrap (readIt none,const []) $ xpList xpDetail where none = (([],"","",""),("","","")) readIt r [] = r readIt acc@((d,p,v,i),(n,s,c)) (x:xs) | Date y <- x = readIt ((y,p,v,i),(n,s,c)) xs | Page y <- x = readIt ((d,y,v,i),(n,s,c)) xs | Volume y <- x = readIt ((d,p,y,i),(n,s,c)) xs | Issue y <- x = readIt ((d,p,v,y),(n,s,c)) xs | Number y <- x = readIt ((d,p,v,i),(y,s,c)) xs | ChapterNr y <- x = readIt ((d,p,v,i),(n,s,y)) xs | Section y <- x = readIt ((d,p,v,i),(n,y,c)) xs | otherwise = acc data Detail = Date [RefDate] | Page String | Volume String | Issue String | Number String | ChapterNr String | Section String deriving ( Eq, Show ) xpDetail :: PU Detail xpDetail = xpAlt tag ps where tag _ = 0 ps = [ xpWrap (Date, const []) $ xpDate , xpWrap (Page, show) $ xpPage , xpWrap (Volume, show) $ xp "volume" , xpWrap (Issue, show) $ xp "issue" , xpWrap (Number, show) $ xp "number" , xpWrap (Section, show) $ xp "section" , xpWrap (ChapterNr,show) $ xp "chapter" ] xpDate = xpWrap (readDate,show) (xpElem "date" xpText0) xp s = xpElemWithAttrValue "detail" "type" s $ xpElem "number" xpText xpPage :: PU String xpPage = xpChoice (xpElemWithAttrValue "detail" "type" "page" $ xpIElem "number" xpText) (xpElemWithAttrValue "extent" "unit" "page" $ xpPair (xpElem "start" xpText) (xpElem "end" xpText)) (\(s,e) -> xpLift (s ++ "-" ++ e)) xpUrl :: PU ([RefDate],String) xpUrl = xpDefault ([],[]) . xpIElem "location" $ xpPair (xpWrap (readDate,show) $ xpDefault [] $ xpAttr "dateLastAccessed" xpText) (xpDefault [] $ xpElem "url" xpText) xpIsbn :: PU String xpIsbn = xpDefault [] $ xpIdentifier "isbn" xpDoi :: PU String xpDoi = xpDefault [] $ xpIdentifier "doi" xpIdentifier :: String -> PU String xpIdentifier i = xpIElem "identifier" $ xpAddFixedAttr "type" i xpText xpNote :: PU (String) xpNote = xpDefault [] $ xpIElem "note" xpText readDate :: String -> [RefDate] readDate s = (parseDate $ takeWhile (/= '/') s) ++ (parseDate . tail' $ dropWhile (/= '/') s) -- | Possible formats: "YYYY", "YYYY-MM", "YYYY-MM-DD". parseDate :: String -> [RefDate] parseDate s = case split (== '-') (unwords $ words s) of [y,m,d] -> [RefDate y m [] d [] []] [y,m] -> [RefDate y m [] [] [] []] [y] -> if and (map isDigit y) then [RefDate y [] [] [] [] []] else [RefDate [] [] [] [] y []] _ -> [] emptyAgents :: [Agent] emptyAgents = [] fromAgent :: [Agent] -> String fromAgent = foldr (<+>) [] . map show readRefType :: [String] -> RefType readRefType [] = NoType readRefType (t:_) = case M.lookup t genreTypeMapping of Just x -> x Nothing -> ArticleJournal -- Reasonable default (?) -- The string constants come from http://www.loc.gov/standards/valuelist/marcgt.html, which are used in the -- "" element (http://www.loc.gov/standards/mods/userguide/genre.html) genreTypeMapping :: M.Map String RefType genreTypeMapping = M.fromList [ ( "book", Book ) , ( "book chapter", Chapter ) , ( "periodical", ArticleJournal ) , ( "newspaper", ArticleNewspaper ) , ( "encyclopedia", EntryEncyclopedia) , ( "conference publication", PaperConference ) , ( "academic journal", ArticleJournal ) , ( "collection", Chapter ) , ( "legal case and case notes", LegalCase ) , ( "legislation", Legislation ) , ( "instruction", Book ) , ( "motion picutre", MotionPicture ) , ( "patent", Patent ) , ( "Ph.D. thesis", Thesis ) , ( "Masters thesis", Thesis ) , ( "report", Report ) , ( "review", Review ) , ( "thesis", Thesis ) , ( "unpublished", NoType ) , ( "web page", Webpage ) , ( "webpage", Webpage ) , ( "web site", Webpage ) ]