{-# LANGUAGE PatternGuards, CPP #-} {-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Input.MODS -- Copyright : (c) Andrea Rossato -- License : BSD-style (see LICENSE) -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- An ugly MODS parser -- ----------------------------------------------------------------------------- module Text.CSL.Input.MODS where import Text.CSL.Eval ( split ) import Text.CSL.Output.Plain ( (<+>), tail' ) import Text.CSL.Pickle import Text.CSL.Reference import Text.CSL.Style ( betterThen ) import Data.Char ( isDigit, isLower ) import qualified Data.Map as M -- | Read a file with a single MODS record. readModsFile :: FilePath -> IO Reference readModsFile = readXmlFile xpMods -- | Read a file with a collection of MODS records. readModsCollectionFile :: FilePath -> IO [Reference] readModsCollectionFile = readXmlFile xpModsCollection xpModsCollection :: PU [Reference] xpModsCollection = xpIElem "modsCollection" $ xpList xpMods xpMods :: PU Reference xpMods = xpIElem "mods" xpReference xpReference :: PU Reference xpReference = xpWrap ( \ ((ref,oref) , (ck,(ty,gn),ti,i,d) ,((au,ed,tr,sp),(re,it,pu',dr),(co,ce,dg,om)) ,((di',pg,vl,is),(nu,sc,ch,vs)) , (di,ac,pu,pp,et) , ((ac',uri),ln,st,no) ) -> ref { refId = ck `betterThen` take 10 (concat . words $ fst ti) , refType = if ty /= NoType then ty else if refType ref == Book then Chapter else refType ref , title = fst ti , titleShort = snd ti , author = au , editor = ed `betterThen` editor ref , edition = et `betterThen` edition ref , translator = tr `betterThen` translator ref , recipient = re `betterThen` recipient ref , interviewer = it `betterThen` interviewer ref , composer = co `betterThen` composer ref , director = dr `betterThen` director ref , collectionEditor = ce `betterThen` collectionEditor ref , publisherPlace = pp `betterThen` publisherPlace ref , numberOfVolumes = vs `betterThen` numberOfVolumes ref , containerAuthor = containerAuthor ref , url = uri , note = no , isbn = i , doi = d , genre = genre ref `betterThen` gn , issued = issued ref `betterThen` di `betterThen` di' , accessed = accessed ref `betterThen` ac `betterThen` ac' , page = page ref `betterThen` pg , volume = volume ref `betterThen` vl , issue = issue ref `betterThen` is `betterThen` number ref `betterThen` nu , number = number ref `betterThen` nu , section = section ref `betterThen` sc , chapterNumber = chapterNumber ref `betterThen` ch , language = language ref `betterThen` ln , status = status ref `betterThen` st , publisher = fromAgent pu `betterThen` publisher ref `betterThen` fromAgent pu' `betterThen` fromAgent dg `betterThen` fromAgent om `betterThen` fromAgent sp , originalDate = issued oref , originalTitle = title oref , originalPublisher = publisher oref , originalPublisherPlace = publisherPlace oref } , \r -> ( (emptyReference,emptyReference) , (refId r,(refType r,genre r), (title r, titleShort r), isbn r, doi r) ,((author r, editor r, translator r, director r) ,(recipient r, interviewer r, emptyAgents, director r) ,(composer r, collectionEditor r, emptyAgents, emptyAgents)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r, numberOfVolumes r)) , (issued r, accessed r, emptyAgents, publisherPlace r, edition r) ,((accessed r, url r), status r, language r, note r) )) $ xp6Tuple (xpPair (xpDefault emptyReference $ xpRelatedItem "host") (xpDefault emptyReference $ xpRelatedItem "original")) (xp5Tuple xpCiteKey xpRefType xpTitle xpIsbn xpDoi) xpAgents xpPart xpOrigin (xp4Tuple xpUrl xpLang xpStatus xpNote) xpCiteKey :: PU String xpCiteKey = xpDefault [] $ xpChoice (xpAttr "ID" xpText) (xpElemWithAttrValue "identifier" "type" "citekey" xpText) xpLift xpOrigin :: PU ([RefDate],[RefDate],[Agent],String,String) xpOrigin = xpDefault ([],[],[],[],[]) . xpIElem "originInfo" $ xp5Tuple (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateIssued" xpText0) (xpDefault [] $ xpWrap (readDate,show) $ xpIElem "dateCaptured" xpText0) (xpDefault [] $ xpList $ xpWrap (\s -> Agent [] [] [] s [] [] False, show) $ xpIElem "publisher" xpText0) (xpDefault [] $ xpIElem "place" $ xpIElem "placeTerm" xpText0) (xpDefault [] $ xpIElem "edition" $ xpText0) xpRefType :: PU (RefType, String) xpRefType = xpDefault (NoType,[]) $ xpWrap (readRefType, const []) xpGenre xpGenre :: PU [String] xpGenre = xpList $ xpIElem "genre" $ xpChoice xpZero (xpPair (xpDefault [] $ xpAttr "authority" xpText) xpText) $ xpLift . snd xpRelatedItem :: String -> PU Reference xpRelatedItem t = xpIElem "relatedItem" . xpAddFixedAttr "type" t $ xpWrap ( \(((t3l,t3s),(t4l,_)) ,((ty,gn),ct) ,((ca,ed,tr,sp),(re,it,pu',dr),(co,ce,dg,om)) ,((di,pg,vl,is),(nu,sc,ch,vs)) , (di',ac,pu,pp,et) , (ln, st) ) -> emptyReference { refType = ty , title = fst ct , containerAuthor = ca , containerTitle = if t3l /= [] then t3l else fst ct , containerTitleShort = if t3s /= [] then t3s else snd ct , collectionTitle = t4l , volumeTitle = if t3l /= [] then fst ct else [] , editor = ed , edition = et , translator = tr , recipient = re , interviewer = it , publisherPlace = pp , composer = co , director = dr , collectionEditor = ce , issued = di `betterThen` di' , accessed = ac , page = pg , volume = vl , issue = is `betterThen` nu , number = nu , section = sc , chapterNumber = ch , genre = gn , numberOfVolumes = vs , language = ln , status = st , publisher = fromAgent $ pu `betterThen` pu' `betterThen` dg `betterThen` om `betterThen` sp } , \r -> (((volumeTitle r,[]),(collectionTitle r,[])) ,((refType r,genre r), (containerTitle r, containerTitleShort r)) ,((containerAuthor r, editor r, translator r, director r) ,(recipient r, interviewer r, emptyAgents, director r) ,(composer r, collectionEditor r, emptyAgents, emptyAgents)) ,((issued r, page r, volume r, issue r) ,(number r, section r, chapterNumber r, numberOfVolumes r)) , (issued r, accessed r,emptyAgents, publisherPlace r, edition r) , (language r, status r) )) $ xp6Tuple xpNestedTitles (xpPair xpRefType xpTitle) xpAgents xpPart xpOrigin (xpPair xpLang xpStatus) xpNestedTitles :: PU ((String, String), (String, String)) xpNestedTitles = xpDefault (([],[]),([],[])) . getRelated $ xpPair xpTitle (getRelated xpTitle) where getRelated = xpIElem "relatedItem" . xpAddFixedAttr "type" "host" xpTitle :: PU (String,String) xpTitle = xpWrap (\((a,b),c) -> createTitle a b c , \s -> (s,[])) $ xpPair (xpIElem "titleInfo" $ xpPair (xpIElem "title" xpText0) (xpDefault [] $ xpIElem "subTitle" xpText0)) (xpDefault [] $ xpIElem "titleInfo" $ xpAddFixedAttr "type" "abbreviated" $ xpElem "title" xpText0) where createTitle [] [] [] = ([],[]) createTitle s [] [] = breakLong s createTitle s [] ab = (s ,ab) createTitle s sub [] = (s ++ colon s ++ sub, s) createTitle s sub ab = (s ++ colon s ++ sub, ab) colon s = if last s == '!' || last s == '?' then " " else ": " breakLong s = let (a,b) = break (== ':') s in if b /= [] then (s,a) else (s, []) xpAgents :: PU (([Agent],[Agent],[Agent],[Agent]) ,([Agent],[Agent],[Agent],[Agent]) ,([Agent],[Agent],[Agent],[Agent])) xpAgents = xpTriple (xp4Tuple (xpAgent "author" "aut") (xpAgent "editor" "edt") (xpAgent "translator" "trl") (xpAgent "sponsor" "spn")) (xp4Tuple (xpAgent "recipient" "rcp") (xpAgent "interviewer" "ivr") (xpAgent "publisher" "pbl") (xpAgent "director" "drt")) (xp4Tuple (xpAgent "composer" "cmp") (xpAgent "collector" "xol") (xpAgent "degree grantor" "dgg") (xpAgent "organizer of meeting" "orm")) xpAgent :: String -> String -> PU [Agent] xpAgent sa sb = xpDefault [] $ xpList $ xpIElem "name" $ xpChoice xpZero (xpIElem "role" $ xpIElem "roleTerm" xpText0) (\x -> if x == sa || x == sb then xpickle else xpZero) instance XmlPickler Agent where xpickle = xpAlt tag ps where tag _ = 0 ps = [ personal, others ] personal = xpWrap ( uncurry parseName , \(Agent gn _ _ fn _ _ _) -> (gn,fn)) $ xpAddFixedAttr "type" "personal" xpNameData others = xpWrap (\s -> Agent [] [] [] [] [] s False, undefined) $ xpElem "namePart" xpText0 -- | "von Hicks,! Jr., Michael" or "la Martine,! III, Martin B. de" or -- "Rossato, Jr., Andrea G. B." or "Paul, III, Juan". parseName :: [String] -> String -> Agent parseName gn fn | ("!":sf:",":xs) <- gn = parse xs (sf ++ ".") True | ("!":sf :xs) <- gn , sf /= [] , last sf == ',' = parse xs sf True | (sf:",":xs) <- gn = parse xs (sf ++ ".") False | (sf :xs) <- gn , sf /= [], last sf == ',' = parse xs sf False | otherwise = parse gn "" False where parse g s b = Agent (getGiven g) (getDrop g) (getNonDrop fn) (getFamily fn) s [] b setInit s = if length s == 1 then s ++ "." else s getDrop = unwords . reverse . takeWhile (and . map isLower) . reverse getGiven = map setInit . reverse . dropWhile (and . map isLower) . reverse getNonDrop = unwords . takeWhile (and . map isLower) . words getFamily = unwords . dropWhile (and . map isLower) . words xpNameData :: PU ([String],String) xpNameData = xpWrap (readName,const []) $ xpList $ xpElem "namePart" $ xpPair (xpAttr "type" xpText) xpText0 where readName x = (readg x, readf x) readf = foldr (\(k,v) xs -> if k == "family" then v else xs) [] readg = foldr (\(k,v) xs -> if k == "given" then v:xs else xs) [] xpPart :: PU (([RefDate],String,String,String) ,(String,String,String,String)) xpPart = xpDefault none . xpIElem "part" . xpWrap (readIt none,const []) $ xpList xpDetail where none = (([],"","",""),("","","","")) readIt r [] = r readIt acc@((d,p,v,i),(n,s,c,vs)) (x:xs) | Date y <- x = readIt ((y,p,v,i),(n,s,c,vs)) xs | Page y <- x = readIt ((d,y,v,i),(n,s,c,vs)) xs | Volume y <- x = readIt ((d,p,y,i),(n,s,c,vs)) xs | Issue y <- x = readIt ((d,p,v,y),(n,s,c,vs)) xs | Number y <- x = readIt ((d,p,v,i),(y,s,c,vs)) xs | ChapterNr y <- x = readIt ((d,p,v,i),(n,s,y,vs)) xs | Section y <- x = readIt ((d,p,v,i),(n,y,c,vs)) xs | NrVols y <- x = readIt ((d,p,v,i),(n,s,c, y)) xs | otherwise = acc data Detail = Date [RefDate] | Page String | Volume String | Issue String | Number String | ChapterNr String | Section String | NrVols String deriving ( Eq, Show ) xpDetail :: PU Detail xpDetail = xpAlt tag ps where tag _ = 0 ps = [ xpWrap (Date, const []) $ xpDate , xpWrap (Page, show) $ xpPage , xpWrap (NrVols, show) $ xpVolumes , xpWrap (Volume, show) $ xp "volume" , xpWrap (Issue, show) $ xp "issue" , xpWrap (Number, show) $ xp "number" , xpWrap (Number, show) $ xp "report number" , xpWrap (Section, show) $ xp "section" , xpWrap (ChapterNr,show) $ xp "chapter" ] xpDate = xpWrap (readDate,show) (xpElem "date" xpText0) xp s = xpElemWithAttrValue "detail" "type" s $ xpElem "number" xpText xpPage :: PU String xpPage = xpChoice (xpElemWithAttrValue "detail" "type" "page" $ xpIElem "number" xpText) (xpElemWithAttrValue "extent" "unit" "page" $ xpPair (xpElem "start" xpText) (xpElem "end" xpText)) (\(s,e) -> xpLift (s ++ "-" ++ e)) xpVolumes :: PU String xpVolumes = xpElemWithAttrValue "extent" "unit" "volumes" $ xpElem "total" xpText xpUrl :: PU ([RefDate],String) xpUrl = xpDefault ([],[]) . xpIElem "location" $ xpPair (xpWrap (readDate,show) $ xpDefault [] $ xpAttr "dateLastAccessed" xpText) (xpDefault [] $ xpElem "url" xpText) xpIsbn :: PU String xpIsbn = xpDefault [] $ xpIdentifier "isbn" xpDoi :: PU String xpDoi = xpDefault [] $ xpIdentifier "doi" xpIdentifier :: String -> PU String xpIdentifier i = xpIElem "identifier" $ xpAddFixedAttr "type" i xpText xpNote :: PU (String) xpNote = xpDefault [] $ xpIElem "note" xpText xpLang :: PU String xpLang = xpDefault [] $ xpChoice (xpIElem "recordInfo" $ xpIElem "languageOfCataloging" $ xpIElem "language" $ xpIElem "languageTerm" xpText) (xpIElem "recordInfo" $ xpIElem "languageOfCataloging" $ xpIElem "languageTerm" xpText) xpLift xpStatus :: PU String xpStatus = xpDefault [] $ --xpElemWithAttrValue "note" "type" "publication status" xpText xpIElem "note" $ xpAddFixedAttr "type" "publication status" xpText readDate :: String -> [RefDate] readDate s = (parseDate $ takeWhile (/= '/') s) ++ (parseDate . tail' $ dropWhile (/= '/') s) -- | Possible formats: "YYYY", "YYYY-MM", "YYYY-MM-DD". parseDate :: String -> [RefDate] parseDate s = case split (== '-') (unwords $ words s) of [y,m,d] -> [RefDate y m [] d [] []] [y,m] -> [RefDate y m [] [] [] []] [y] -> if and (map isDigit y) then [RefDate y [] [] [] [] []] else [RefDate [] [] [] [] y []] _ -> [] emptyAgents :: [Agent] emptyAgents = [] fromAgent :: [Agent] -> String fromAgent = foldr (<+>) [] . map show readRefType :: [String] -> (RefType, String) readRefType [] = (NoType,[]) readRefType (t:ts) = case M.lookup t genreTypeMapping of Just x -> (x, if ts /= [] then head ts else []) Nothing -> if ts /= [] then case M.lookup (head ts) genreTypeMapping of Just x -> (x, t) Nothing -> (ArticleJournal, t) else (ArticleJournal, []) -- The string constants come from http://www.loc.gov/standards/valuelist/marcgt.html, which are used in the -- "" element (http://www.loc.gov/standards/mods/userguide/genre.html) genreTypeMapping :: M.Map String RefType genreTypeMapping = M.fromList [ ( "book", Book ) , ( "book chapter", Chapter ) , ( "periodical", ArticleJournal ) , ( "newspaper", ArticleNewspaper ) , ( "magazine", ArticleNewspaper ) , ( "magazine article", ArticleNewspaper ) , ( "encyclopedia", EntryEncyclopedia) , ( "conference publication", Book ) , ( "academic journal", ArticleJournal ) , ( "collection", Chapter ) , ( "legal case and case notes", LegalCase ) , ( "legislation", Legislation ) , ( "instruction", Book ) , ( "motion picture", MotionPicture ) , ( "film", MotionPicture ) , ( "tvBroadcast", MotionPicture ) , ( "videoRecording", MotionPicture ) , ( "videorecording", MotionPicture ) , ( "patent", Patent ) , ( "Ph.D. thesis", Thesis ) , ( "Masters thesis", Thesis ) , ( "report", Report ) , ( "technical report", Report ) , ( "review", Review ) , ( "thesis", Thesis ) , ( "unpublished", NoType ) , ( "web page", Webpage ) , ( "webpage", Webpage ) , ( "web site", Webpage ) ]