{-# LANGUAGE PatternGuards, CPP #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.CSL.Input.MODS
-- Copyright   :  (c) Andrea Rossato
-- License     :  BSD-style (see LICENSE)
--
-- Maintainer  :  Andrea Rossato <andrea.rossato@unitn.it>
-- Stability   :  unstable
-- Portability :  unportable
--
-- An ugly MODS parser
--
-----------------------------------------------------------------------------

module Text.CSL.Input.MODS where

import Text.CSL.Eval ( split )
import Text.CSL.Output.Plain ( (<+>), tail' )
import Text.CSL.Pickle
import Text.CSL.Reference
import Text.CSL.Style ( betterThen )

import Data.Char ( isDigit, isLower )
import qualified Data.Map as M

-- | Read a file with a single MODS record.
readModsFile :: FilePath -> IO Reference
readModsFile = readXmlFile xpMods

-- | Read a file with a collection of MODS records.
readModsCollectionFile :: FilePath -> IO [Reference]
readModsCollectionFile = readXmlFile xpModsCollection

xpModsCollection :: PU [Reference]
xpModsCollection = xpIElem "modsCollection" $ xpList xpMods

xpMods :: PU Reference
xpMods = xpIElem "mods" xpReference

xpReference :: PU Reference
xpReference
    = xpWrap ( \ ( ref
                , (ck,ty,ti,i,d)
                ,((au,ed,tr),(re,it,pu'),(co,ce))
                ,((di',pg,vl,is),(nu,sc,ch))
                , (di,ac,pu,pp)
                , ((ac',uri),no)
                 ) ->
               ref { refId            = ck `betterThen` take 10 (concat $ words ti)
                   , refType          = if ty == NoType then refType ref else ty
                   , title            = ti
                   , author           = au
                   , editor           = ed `betterThen` editor           ref
                   , translator       = tr `betterThen` translator       ref
                   , recipient        = re `betterThen` recipient        ref
                   , interviewer      = it `betterThen` interviewer      ref
                   , composer         = co `betterThen` composer         ref
                   , collectionEditor = ce `betterThen` collectionEditor ref
                   , publisherPlace   = pp `betterThen` publisherPlace   ref
                   , containerAuthor  = containerAuthor  ref
                   , url              = uri
                   , note             = no
                   , isbn             = i
                   , doi              = d
                   , issued           = issued        ref `betterThen` di `betterThen` di'
                   , accessed         = accessed      ref `betterThen` ac `betterThen` ac'
                   , page             = page          ref `betterThen` pg
                   , volume           = volume        ref `betterThen` vl
                   , issue            = issue         ref `betterThen` is
                   , number           = number        ref `betterThen` nu
                   , section          = section       ref `betterThen` sc
                   , chapterNumber    = chapterNumber ref `betterThen` ch
                   , publisher        = (foldr (<+>) [] . map show $ pu)
                                           `betterThen` publisher ref
                                           `betterThen` (foldr (<+>) [] . map show $ pu')
                   }
             , \r -> (  r
                     , (refId     r, refType          r, title          r, isbn r, doi r)
                     ,((author    r, editor           r, translator     r)
                      ,(recipient r, interviewer      r, emptyAgents     )
                      ,(composer  r, collectionEditor r))
                     ,((issued    r, page  r, volume r, issue r)
                      ,(number    r, section   r, chapterNumber r))
                     , (issued    r, accessed r, emptyAgents, publisherPlace r)
                     ,((accessed  r, url r), note r)
                     )) $
      xp6Tuple (xpDefault emptyReference xpRelatedItem)
               (xp5Tuple  xpCiteKey xpRefType xpTitle xpIsbn xpDoi)
                xpAgents xpPart xpOrigin (xpPair xpUrl xpNote)

xpCiteKey :: PU String
xpCiteKey
    = xpDefault [] $
      xpChoice (xpAttr "ID" xpText)
               (xpElemWithAttrValue "identifier" "type" "citekey" xpText)
                xpLift

xpOrigin :: PU ([RefDate],[RefDate],[Agent],String)
xpOrigin
    = xpDefault ([],[],[],[]) . xpIElem "originInfo" $
      xp4Tuple (xpDefault [] $ xpWrap (readDate,show) $
                xpIElem "dateIssued" xpText0)
               (xpDefault [] $ xpWrap (readDate,show) $
                xpIElem "dateCaptured" xpText0)
               (xpDefault [] $ xpList $ xpWrap (\s -> Agent [] [] [] s [] [] False, show) $
                xpIElem "publisher" xpText0)
               (xpDefault [] $ xpIElem "place" $ xpIElem "placeTerm" xpText0)

xpRefType :: PU RefType
xpRefType
    = xpDefault NoType $
      xpWrap (readRefType, const []) xpGenre

xpGenre :: PU [String]
xpGenre
    = xpList $ xpIElem "genre" $
      xpChoice xpZero
              (xpPair (xpDefault [] $ xpAttr "authority" xpText) xpText)
              $ xpLift . snd

xpRelatedItem :: PU Reference
xpRelatedItem
    = xpIElem "relatedItem" . xpAddFixedAttr "type" "host" $
      xpWrap ( \( (ty,ct)
                ,((ca,ed,tr),(re,it,pu'),(co,ce))
                ,((di,pg,vl,is),(nu,sc,ch))
                , (di',ac,pu,pp)
                ) ->
               emptyReference { refType          = ty
                              , containerAuthor  = ca
                              , containerTitle   = ct
                              , editor           = ed
                              , translator       = tr
                              , recipient        = re
                              , interviewer      = it
                              , publisher        = foldr (<+>) [] . map show $ pu `betterThen` pu'
                              , publisherPlace   = pp
                              , composer         = co
                              , collectionEditor = ce
                              , issued           = di `betterThen` di'
                              , accessed         = ac
                              , page             = pg
                              , volume           = vl
                              , issue            = is
                              , number           = nu
                              , section          = sc
                              , chapterNumber    = ch
                              }
             , \r -> ( (refType         r, containerTitle   r)
                     ,((containerAuthor r, editor           r, translator r)
                      ,(recipient       r, interviewer      r, emptyAgents )
                      ,(composer        r, collectionEditor r))
                     ,((issued  r, page  r, volume r, issue r)
                      ,(number  r, section   r, chapterNumber r))
                     , (issued  r, accessed  r,emptyAgents, publisherPlace r)
                     )) $
      xp4Tuple (xpPair  xpRefType xpTitle)
                xpAgents xpPart xpOrigin

-- FIXME: join title and subtitle correctly: usare Title per shortTitle.
xpTitle :: PU String
xpTitle
    = xpWrap (uncurry (<+>), \s -> (s,[])) $
      xpIElem "titleInfo" $
      xpPair (xpIElem "title" xpText0)
             (xpDefault [] $ xpIElem "subTitle" xpText0)

xpAgents :: PU (([Agent],[Agent],[Agent])
               ,([Agent],[Agent],[Agent])
               ,([Agent],[Agent]))
xpAgents
    = xpTriple (xpTriple (xpAgent "author"      "aut")
                         (xpAgent "editor"      "edt")
                         (xpAgent "translator"  "trl"))
               (xpTriple (xpAgent "recipient"   "rcp")
                         (xpAgent "interviewer" "ivr")
                         (xpAgent "publisher"   "pbl"))
               (xpPair   (xpAgent "composer"    "cmp")
                         (xpAgent "collector"   "xol"))

xpAgent :: String -> String -> PU [Agent]
xpAgent sa sb
    = xpDefault [] $ xpList $ xpIElem "name" $
      xpChoice  xpZero
               (xpIElem "role" $ xpIElem "roleTerm" xpText0)
               (\x -> if x == sa || x == sb then xpickle else xpZero)

instance XmlPickler Agent where
    xpickle = xpWrap ( uncurry parseName
                     , \(Agent gn _ _ fn _ _ _) -> (gn,fn)) $
                 xpAddFixedAttr "type" "personal" xpNameData

-- | "von Hicks,! Jr., Michael" or "la Martine,! III, Martin B. de" or
-- "Rossato, Jr., Andrea G. B." or "Paul, III, Juan".
parseName :: [String] -> String -> Agent
parseName gn fn
    | ("!":sf:",":xs) <- gn     = parse xs (sf ++ ".") True
    | ("!":sf    :xs) <- gn
    , sf /= [] , last sf == ',' = parse xs  sf         True

    | (sf:",":xs)     <- gn     = parse xs (sf ++ ".") False
    | (sf    :xs)     <- gn
    , sf /= [], last sf == ','  = parse xs  sf         False
    | otherwise                 = parse gn  ""         False
    where
      parse g s b = Agent (getGiven g) (getDrop g) (getNonDrop fn) (getFamily fn) s [] b
      setInit   s = if length s == 1 then s ++ "." else s
      getDrop     = unwords     . reverse . takeWhile (and . map isLower) . reverse
      getGiven    = map setInit . reverse . dropWhile (and . map isLower) . reverse
      getNonDrop  = unwords . takeWhile (and . map isLower) . words
      getFamily   = unwords . dropWhile (and . map isLower) . words

xpNameData :: PU ([String],String)
xpNameData
    = xpWrap (readName,const []) $
      xpList $ xpElem "namePart" $ xpPair (xpAttr "type" xpText) xpText0
    where
      readName x = (readg x, readf x)
      readf = foldr (\(k,v) xs -> if k == "family" then v    else xs) []
      readg = foldr (\(k,v) xs -> if k == "given"  then v:xs else xs) []

xpPart :: PU (([RefDate],String,String,String)
             ,(String,String,String))
xpPart
    = xpDefault none . xpIElem "part" .
      xpWrap (readIt none,const []) $ xpList xpDetail
    where
      none = (([],"","",""),("","",""))
      readIt r [] = r
      readIt acc@((d,p,v,i),(n,s,c)) (x:xs)
          | Date      y <- x = readIt ((y,p,v,i),(n,s,c)) xs
          | Page      y <- x = readIt ((d,y,v,i),(n,s,c)) xs
          | Volume    y <- x = readIt ((d,p,y,i),(n,s,c)) xs
          | Issue     y <- x = readIt ((d,p,v,y),(n,s,c)) xs
          | Number    y <- x = readIt ((d,p,v,i),(y,s,c)) xs
          | ChapterNr y <- x = readIt ((d,p,v,i),(n,s,y)) xs
          | Section   y <- x = readIt ((d,p,v,i),(n,y,c)) xs
          | otherwise        = acc

data Detail
    = Date     [RefDate]
    | Page      String
    | Volume    String
    | Issue     String
    | Number    String
    | ChapterNr String
    | Section   String
      deriving ( Eq, Show )

xpDetail :: PU Detail
xpDetail
    = xpAlt tag ps
    where
      tag _ = 0
      ps = [ xpWrap (Date, const []) $ xpDate
           , xpWrap (Page,     show) $ xpPage
           , xpWrap (Volume,   show) $ xp "volume"
           , xpWrap (Issue,    show) $ xp "issue"
           , xpWrap (Number,   show) $ xp "number"
           , xpWrap (Section,  show) $ xp "section"
           , xpWrap (ChapterNr,show) $ xp "chapter"
           ]
      xpDate = xpWrap (readDate,show) (xpElem "date" xpText0)
      xp   s = xpElemWithAttrValue "detail" "type" s $
               xpElem "number" xpText

xpPage :: PU String
xpPage
    = xpChoice (xpElemWithAttrValue "detail" "type" "page" $ xpIElem "number" xpText)
               (xpElemWithAttrValue "extent" "unit" "page" $
                xpPair (xpElem "start" xpText)
                       (xpElem "end"   xpText))
               (\(s,e) -> xpLift (s ++ "-" ++ e))

xpUrl :: PU ([RefDate],String)
xpUrl
    = xpDefault ([],[]) . xpIElem "location" $
      xpPair (xpWrap (readDate,show) $
              xpDefault [] $ xpAttr "dateLastAccessed" xpText)
             (xpDefault [] $ xpElem "url"              xpText)

xpIsbn :: PU String
xpIsbn = xpDefault [] $ xpIdentifier "isbn"

xpDoi :: PU String
xpDoi = xpDefault [] $ xpIdentifier "doi"

xpIdentifier :: String -> PU String
xpIdentifier i
    = xpIElem "identifier" $ xpAddFixedAttr "type" i xpText

xpNote :: PU (String)
xpNote = xpDefault [] $ xpIElem "note" xpText

readDate :: String -> [RefDate]
readDate s = (parseDate         $ takeWhile (/= '/') s) ++
             (parseDate . tail' $ dropWhile (/= '/') s)

-- | Possible formats: "YYYY", "YYYY-MM", "YYYY-MM-DD".
parseDate :: String -> [RefDate]
parseDate s = case split (== '-') (unwords $ words s) of
                [y,m,d] -> [RefDate y m [] d  [] []]
                [y,m]   -> [RefDate y m [] [] [] []]
                [y]     -> if and (map isDigit y)
                           then [RefDate y  [] [] [] [] []]
                           else [RefDate [] [] [] [] y  []]
                _       -> []

emptyAgents :: [Agent]
emptyAgents  = []

readRefType :: [String] -> RefType
readRefType [] = NoType
readRefType (t:_) =
  case M.lookup t genreTypeMapping of
    Just x  -> x
    Nothing -> ArticleJournal       -- Reasonable default (?)

-- The string constants come from http://www.loc.gov/standards/valuelist/marcgt.html, which are used in the
-- "<genre></genre>" element (http://www.loc.gov/standards/mods/userguide/genre.html)
genreTypeMapping ::  M.Map String RefType
genreTypeMapping = M.fromList
  [ ( "book",                       Book )
  , ( "periodical",                 ArticleJournal )
  , ( "newspaper",                  ArticleNewspaper )
  , ( "encyclopedia",               EntryEncyclopedia )
  , ( "conference publication",     PaperConference )
  , ( "academic journal",           ArticleJournal )
  , ( "collection",                 Chapter )
  , ( "legal case and case notes",  LegalCase )
  , ( "legislation",                Legislation )
  , ( "motion picutre",             MotionPicture )
  , ( "patent",                     Patent )
  , ( "review",                     Review )
  , ( "thesis",                     Thesis )
  , ( "web page",                   Webpage )
  , ( "webpage",                    Webpage )
  , ( "web site",                   Webpage )
  ]