{-# LANGUAGE PatternGuards, CPP #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.CSL.Input.MODS
-- Copyright   :  (c) Andrea Rossato
-- License     :  BSD-style (see LICENSE)
--
-- Maintainer  :  Andrea Rossato <andrea.rossato@unitn.it>
-- Stability   :  unstable
-- Portability :  unportable
--
-- An ugly MODS parser
--
-----------------------------------------------------------------------------

module Text.CSL.Input.MODS where

import Text.CSL.Eval ( split )
import Text.CSL.Output.Plain ( (<+>), tail' )
import Text.CSL.Pickle
import Text.CSL.Reference
import Text.CSL.Style ( betterThen )

import Data.Char ( isDigit, isLower )
import qualified Data.Map as M

-- | Read a file with a single MODS record.
readModsFile :: FilePath -> IO Reference
readModsFile = readXmlFile xpMods

-- | Read a file with a collection of MODS records.
readModsCollectionFile :: FilePath -> IO [Reference]
readModsCollectionFile = readXmlFile xpModsCollection

xpModsCollection :: PU [Reference]
xpModsCollection = xpIElem "modsCollection" $ xpList xpMods

xpMods :: PU Reference
xpMods = xpIElem "mods" xpReference

xpReference :: PU Reference
xpReference
    = xpWrap ( \ ( ref
                , (ck,ty,ti,i,d)
                ,((au,ed,tr),(re,it,pu'),(co,ce,dg,om))
                ,((di',pg,vl,is),(nu,sc,ch))
                , (di,ac,pu,pp,et)
                , ((ac',uri),no)
                 ) ->
               ref { refId            = ck `betterThen` take 10 (concat $ words ti)
                   , refType          = if ty /= NoType then ty else
                                        if refType ref == Book then Chapter else refType ref
                   , title            = ti
                   , author           = au
                   , editor           = ed `betterThen` editor           ref
                   , edition          = et `betterThen` edition          ref
                   , translator       = tr `betterThen` translator       ref
                   , recipient        = re `betterThen` recipient        ref
                   , interviewer      = it `betterThen` interviewer      ref
                   , composer         = co `betterThen` composer         ref
                   , collectionEditor = ce `betterThen` collectionEditor ref
                   , publisherPlace   = pp `betterThen` publisherPlace   ref
                   , containerAuthor  = containerAuthor  ref
                   , url              = uri
                   , note             = no
                   , isbn             = i
                   , doi              = d
                   , issued           = issued        ref `betterThen` di `betterThen` di'
                   , accessed         = accessed      ref `betterThen` ac `betterThen` ac'
                   , page             = page          ref `betterThen` pg
                   , volume           = volume        ref `betterThen` vl
                   , issue            = issue         ref `betterThen` is  `betterThen`
                                        number        ref `betterThen` nu
                   , number           = number        ref `betterThen` nu
                   , section          = section       ref `betterThen` sc
                   , chapterNumber    = chapterNumber ref `betterThen` ch
                   , publisher        = fromAgent pu
                                           `betterThen` publisher ref
                                           `betterThen` fromAgent pu'
                                           `betterThen` fromAgent dg
                                           `betterThen` fromAgent om
                   }
             , \r -> (  r
                     , (refId     r, refType          r, title          r, isbn r, doi r)
                     ,((author    r, editor           r, translator     r)
                      ,(recipient r, interviewer      r, emptyAgents     )
                      ,(composer  r, collectionEditor r, emptyAgents, emptyAgents))
                     ,((issued    r, page  r, volume r, issue r)
                      ,(number    r, section   r, chapterNumber r))
                     , (issued    r, accessed r, emptyAgents, publisherPlace r, edition r)
                     ,((accessed  r, url r), note r)
                     )) $
      xp6Tuple (xpDefault emptyReference xpRelatedItem)
               (xp5Tuple  xpCiteKey xpRefType xpTitle xpIsbn xpDoi)
                xpAgents xpPart xpOrigin (xpPair xpUrl xpNote)

xpCiteKey :: PU String
xpCiteKey
    = xpDefault [] $
      xpChoice (xpAttr "ID" xpText)
               (xpElemWithAttrValue "identifier" "type" "citekey" xpText)
                xpLift

xpOrigin :: PU ([RefDate],[RefDate],[Agent],String,String)
xpOrigin
    = xpDefault ([],[],[],[],[]) . xpIElem "originInfo" $
      xp5Tuple (xpDefault [] $ xpWrap (readDate,show) $
                xpIElem "dateIssued" xpText0)
               (xpDefault [] $ xpWrap (readDate,show) $
                xpIElem "dateCaptured" xpText0)
               (xpDefault [] $ xpList $ xpWrap (\s -> Agent [] [] [] s [] [] False, show) $
                xpIElem "publisher" xpText0)
               (xpDefault [] $ xpIElem "place"   $ xpIElem "placeTerm" xpText0)
               (xpDefault [] $ xpIElem "edition" $                     xpText0)

xpRefType :: PU RefType
xpRefType
    = xpDefault NoType $
      xpWrap (readRefType, const []) xpGenre

xpGenre :: PU [String]
xpGenre
    = xpList $ xpIElem "genre" $
      xpChoice xpZero
              (xpPair (xpDefault [] $ xpAttr "authority" xpText) xpText)
              $ xpLift . snd

xpRelatedItem :: PU Reference
xpRelatedItem
    = xpIElem "relatedItem" . xpAddFixedAttr "type" "host" $
      xpWrap ( \( (ty,ct)
                ,((ca,ed,tr),(re,it,pu'),(co,ce,dg,om))
                ,((di,pg,vl,is),(nu,sc,ch))
                , (di',ac,pu,pp,et)
                ) ->
               emptyReference { refType          = ty
                              , containerAuthor  = ca
                              , containerTitle   = ct
                              , editor           = ed
                              , edition          = et
                              , translator       = tr
                              , recipient        = re
                              , interviewer      = it
                              , publisherPlace   = pp
                              , composer         = co
                              , collectionEditor = ce
                              , issued           = di `betterThen` di'
                              , accessed         = ac
                              , page             = pg
                              , volume           = vl
                              , issue            = is `betterThen` nu
                              , number           = nu
                              , section          = sc
                              , chapterNumber    = ch
                              , publisher        = fromAgent $ pu `betterThen` pu' `betterThen`
                                                               dg `betterThen` om

                              }
             , \r -> ( (refType         r, containerTitle   r)
                     ,((containerAuthor r, editor           r, translator r)
                      ,(recipient       r, interviewer      r, emptyAgents )
                      ,(composer        r, collectionEditor r, emptyAgents , emptyAgents))
                     ,((issued  r, page  r, volume r, issue r)
                      ,(number  r, section   r, chapterNumber r))
                     , (issued  r, accessed  r,emptyAgents, publisherPlace r, edition r)
                     )) $
      xp4Tuple (xpPair  xpRefType xpTitle)
                xpAgents xpPart xpOrigin

-- FIXME: join title and subtitle correctly: usare Title per shortTitle.
xpTitle :: PU String
xpTitle
    = xpWrap (uncurry (<+>), \s -> (s,[])) $
      xpIElem "titleInfo" $
      xpPair (xpIElem "title" xpText0)
             (xpDefault [] $ xpIElem "subTitle" xpText0)

xpAgents :: PU (([Agent],[Agent],[Agent])
               ,([Agent],[Agent],[Agent])
               ,([Agent],[Agent],[Agent],[Agent]))
xpAgents
    = xpTriple (xpTriple (xpAgent "author"         "aut")
                         (xpAgent "editor"         "edt")
                         (xpAgent "translator"     "trl"))
               (xpTriple (xpAgent "recipient"      "rcp")
                         (xpAgent "interviewer"    "ivr")
                         (xpAgent "publisher"      "pbl"))
               (xp4Tuple (xpAgent "composer"       "cmp")
                         (xpAgent "collector"      "xol")
                         (xpAgent "degree grantor" "dgg")
                         (xpAgent "organizer of meeting" "orm"))

xpAgent :: String -> String -> PU [Agent]
xpAgent sa sb
    = xpDefault [] $ xpList $ xpIElem "name" $
      xpChoice  xpZero
               (xpIElem "role" $ xpIElem "roleTerm" xpText0)
               (\x -> if x == sa || x == sb then xpickle else xpZero)

instance XmlPickler Agent where
    xpickle = xpAlt tag ps
        where
          tag _ = 0
          ps    = [ personal, others ]
          personal = xpWrap ( uncurry parseName
                            , \(Agent gn _ _ fn _ _ _) -> (gn,fn)) $
                     xpAddFixedAttr "type" "personal" xpNameData
          others   =xpWrap (\s -> Agent [] [] [] [] [] s False, undefined) $
                     xpElem "namePart" xpText0

-- | "von Hicks,! Jr., Michael" or "la Martine,! III, Martin B. de" or
-- "Rossato, Jr., Andrea G. B." or "Paul, III, Juan".
parseName :: [String] -> String -> Agent
parseName gn fn
    | ("!":sf:",":xs) <- gn     = parse xs (sf ++ ".") True
    | ("!":sf    :xs) <- gn
    , sf /= [] , last sf == ',' = parse xs  sf         True

    | (sf:",":xs)     <- gn     = parse xs (sf ++ ".") False
    | (sf    :xs)     <- gn
    , sf /= [], last sf == ','  = parse xs  sf         False
    | otherwise                 = parse gn  ""         False
    where
      parse g s b = Agent (getGiven g) (getDrop g) (getNonDrop fn) (getFamily fn) s [] b
      setInit   s = if length s == 1 then s ++ "." else s
      getDrop     = unwords     . reverse . takeWhile (and . map isLower) . reverse
      getGiven    = map setInit . reverse . dropWhile (and . map isLower) . reverse
      getNonDrop  = unwords . takeWhile (and . map isLower) . words
      getFamily   = unwords . dropWhile (and . map isLower) . words

xpNameData :: PU ([String],String)
xpNameData
    = xpWrap (readName,const []) $
      xpList $ xpElem "namePart" $ xpPair (xpAttr "type" xpText) xpText0
    where
      readName x = (readg x, readf x)
      readf = foldr (\(k,v) xs -> if k == "family" then v    else xs) []
      readg = foldr (\(k,v) xs -> if k == "given"  then v:xs else xs) []

xpPart :: PU (([RefDate],String,String,String)
             ,(String,String,String))
xpPart
    = xpDefault none . xpIElem "part" .
      xpWrap (readIt none,const []) $ xpList xpDetail
    where
      none = (([],"","",""),("","",""))
      readIt r [] = r
      readIt acc@((d,p,v,i),(n,s,c)) (x:xs)
          | Date      y <- x = readIt ((y,p,v,i),(n,s,c)) xs
          | Page      y <- x = readIt ((d,y,v,i),(n,s,c)) xs
          | Volume    y <- x = readIt ((d,p,y,i),(n,s,c)) xs
          | Issue     y <- x = readIt ((d,p,v,y),(n,s,c)) xs
          | Number    y <- x = readIt ((d,p,v,i),(y,s,c)) xs
          | ChapterNr y <- x = readIt ((d,p,v,i),(n,s,y)) xs
          | Section   y <- x = readIt ((d,p,v,i),(n,y,c)) xs
          | otherwise        = acc

data Detail
    = Date     [RefDate]
    | Page      String
    | Volume    String
    | Issue     String
    | Number    String
    | ChapterNr String
    | Section   String
      deriving ( Eq, Show )

xpDetail :: PU Detail
xpDetail
    = xpAlt tag ps
    where
      tag _ = 0
      ps = [ xpWrap (Date, const []) $ xpDate
           , xpWrap (Page,     show) $ xpPage
           , xpWrap (Volume,   show) $ xp "volume"
           , xpWrap (Issue,    show) $ xp "issue"
           , xpWrap (Number,   show) $ xp "number"
           , xpWrap (Section,  show) $ xp "section"
           , xpWrap (ChapterNr,show) $ xp "chapter"
           ]
      xpDate = xpWrap (readDate,show) (xpElem "date" xpText0)
      xp   s = xpElemWithAttrValue "detail" "type" s $
               xpElem "number" xpText

xpPage :: PU String
xpPage
    = xpChoice (xpElemWithAttrValue "detail" "type" "page" $ xpIElem "number" xpText)
               (xpElemWithAttrValue "extent" "unit" "page" $
                xpPair (xpElem "start" xpText)
                       (xpElem "end"   xpText))
               (\(s,e) -> xpLift (s ++ "-" ++ e))

xpUrl :: PU ([RefDate],String)
xpUrl
    = xpDefault ([],[]) . xpIElem "location" $
      xpPair (xpWrap (readDate,show) $
              xpDefault [] $ xpAttr "dateLastAccessed" xpText)
             (xpDefault [] $ xpElem "url"              xpText)

xpIsbn :: PU String
xpIsbn = xpDefault [] $ xpIdentifier "isbn"

xpDoi :: PU String
xpDoi = xpDefault [] $ xpIdentifier "doi"

xpIdentifier :: String -> PU String
xpIdentifier i
    = xpIElem "identifier" $ xpAddFixedAttr "type" i xpText

xpNote :: PU (String)
xpNote = xpDefault [] $ xpIElem "note" xpText

readDate :: String -> [RefDate]
readDate s = (parseDate         $ takeWhile (/= '/') s) ++
             (parseDate . tail' $ dropWhile (/= '/') s)

-- | Possible formats: "YYYY", "YYYY-MM", "YYYY-MM-DD".
parseDate :: String -> [RefDate]
parseDate s = case split (== '-') (unwords $ words s) of
                [y,m,d] -> [RefDate y m [] d  [] []]
                [y,m]   -> [RefDate y m [] [] [] []]
                [y]     -> if and (map isDigit y)
                           then [RefDate y  [] [] [] [] []]
                           else [RefDate [] [] [] [] y  []]
                _       -> []

emptyAgents :: [Agent]
emptyAgents  = []

fromAgent :: [Agent] -> String
fromAgent = foldr (<+>) [] . map show

readRefType :: [String] -> RefType
readRefType [] = NoType
readRefType (t:_) =
  case M.lookup t genreTypeMapping of
    Just x  -> x
    Nothing -> ArticleJournal       -- Reasonable default (?)

-- The string constants come from http://www.loc.gov/standards/valuelist/marcgt.html, which are used in the
-- "<genre></genre>" element (http://www.loc.gov/standards/mods/userguide/genre.html)
genreTypeMapping ::  M.Map String RefType
genreTypeMapping = M.fromList
  [ ( "book",                       Book             )
  , ( "book chapter",               Chapter          )
  , ( "periodical",                 ArticleJournal   )
  , ( "newspaper",                  ArticleNewspaper )
  , ( "encyclopedia",               EntryEncyclopedia)
  , ( "conference publication",     PaperConference  )
  , ( "academic journal",           ArticleJournal   )
  , ( "collection",                 Chapter          )
  , ( "legal case and case notes",  LegalCase        )
  , ( "legislation",                Legislation      )
  , ( "instruction",                Book             )
  , ( "motion picutre",             MotionPicture    )
  , ( "patent",                     Patent           )
  , ( "Ph.D. thesis",               Thesis           )
  , ( "Masters thesis",             Thesis           )
  , ( "report",                     Report           )
  , ( "review",                     Review           )
  , ( "thesis",                     Thesis           )
  , ( "unpublished",                NoType           )
  , ( "web page",                   Webpage          )
  , ( "webpage",                    Webpage          )
  , ( "web site",                   Webpage          )
  ]