{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleInstances,
    ScopedTypeVariables, CPP #-}
module Text.CSL.Pandoc (processCites, processCites') where

import Text.Pandoc
import Text.Pandoc.Walk
import Text.Pandoc.Builder (setMeta, deleteMeta)
import Text.Pandoc.Shared (stringify)
import Text.HTML.TagSoup.Entity (lookupEntity)
import qualified Data.ByteString.Lazy as L
import System.SetEnv (setEnv)
import System.Environment (getEnv)
import Control.Applicative ((<|>))
import Data.Aeson
import Data.Char ( isDigit, isPunctuation, toLower, isSpace )
import qualified Data.Map as M
import Text.CSL.Reference hiding (processCites, Value)
import Text.CSL.Input.Bibutils (readBiblioFile, convertRefs)
import Text.CSL.Style hiding (Cite(..), Citation(..))
import Text.CSL.Proc
import Text.CSL.Output.Pandoc (renderPandoc, renderPandoc')
import qualified Text.CSL.Style as CSL
import Text.CSL.Parser
import Text.CSL.Output.Pandoc ( headInline, tailInline, initInline,
                                toCapital )
import Text.CSL.Data (getDefaultCSL)
import Text.Parsec hiding (State, (<|>))
import Control.Monad
import qualified Control.Exception as E
import Control.Monad.State
import System.FilePath
import System.Directory (doesFileExist, getAppUserDataDirectory)
import Text.CSL.Util (findFile, splitStrWhen, tr', parseRomanNumeral, trim)
import System.IO.Error (isDoesNotExistError)
import Data.Maybe (fromMaybe)
import Text.XML (XMLException(..))

-- | Process a 'Pandoc' document by adding citations formatted
-- according to a CSL style.  Add a bibliography (if one is called
-- for) at the end of the document.
processCites :: Style -> [Reference] -> Pandoc -> Pandoc
processCites style refs (Pandoc m1 b1) =
  let metanocites   = lookupMeta "nocite" m1
      nocites       = mkNociteWildcards refs . query getCitation <$> metanocites
      Pandoc m2 b2  = evalState (walkM setHashes $ Pandoc (deleteMeta "nocite" m1) b1) 1
      grps          = query getCitation (Pandoc m2 b2) ++ fromMaybe [] nocites
      locMap        = locatorMap style
      result        = citeproc procOpts{ linkCitations = isLinkCitations m2}
                        style refs (setNearNote style $
                        map (map (toCslCite locMap)) grps)
      cits_map      = tr' "cits_map" $ M.fromList $ zip grps (citations result)
      biblioList    = map (renderPandoc' style) $ zip (bibliography result) (citationIds result)
      moveNotes     = case lookupMeta "notes-after-punctuation" m1 of
                           Just (MetaBool False) -> False
                           _                     -> True
      Pandoc m3 bs  = bottomUp (mvPunct moveNotes style) . deNote .
                        topDown (processCite style cits_map) $ Pandoc m2 b2
      m             = case metanocites of
                            Nothing -> m3
                            Just x  -> setMeta "nocite" x m3
  in  Pandoc m $ bottomUp (concatMap removeNocaseSpans)
               $ insertRefs m biblioList bs

-- if document contains a Div with id="refs", insert
-- references as its contents.  Otherwise, insert references
-- at the end of the document in a Div with id="refs"
insertRefs :: Meta -> [Block] -> [Block] -> [Block]
insertRefs _    []   bs = bs
insertRefs meta refs bs =
  if isRefRemove meta
     then bs
     else case runState (walkM go bs) False of
               (bs', True) -> bs'
               (_, False)  ->
                  case reverse bs of
                        (Header lev (id',classes,kvs) ys) : xs ->
                          reverse xs ++
                            [Header lev (id',addUnNumbered classes,kvs) ys,
                             Div ("refs",["references"],[]) refs]
                        _   -> bs ++ refHeader ++
                                [Div ("refs",["references"],[]) refs]
  where go :: Block -> State Bool Block
        go (Div attr@("refs",_,_) xs) = do
          put True
          -- refHeader isn't used if you have an explicit references div
          return $ Div attr (xs ++ refs)
        go x = return x
        addUnNumbered cs = "unnumbered" : [c | c <- cs, c /= "unnumbered"]
        refHeader = case refTitle meta of
                     Just ils ->
                       [Header 1 ("bibliography", ["unnumbered"], []) ils]
                     _        -> []

refTitle :: Meta -> Maybe [Inline]
refTitle meta =
  case lookupMeta "reference-section-title" meta of
    Just (MetaString s)           -> Just [Str s]
    Just (MetaInlines ils)        -> Just ils
    Just (MetaBlocks [Plain ils]) -> Just ils
    Just (MetaBlocks [Para ils])  -> Just ils
    _                             -> Nothing

isRefRemove :: Meta -> Bool
isRefRemove meta =
  case lookupMeta "suppress-bibliography" meta of
    Just (MetaBool True) -> True
    _                    -> False

isLinkCitations :: Meta -> Bool
isLinkCitations meta =
  case lookupMeta "link-citations" meta of
    Just (MetaBool True)   -> True
    Just (MetaString s)    -> map toLower s `elem` yesValues
    Just (MetaInlines ils) -> map toLower (stringify ils) `elem` yesValues
    _                      -> False
  where yesValues = ["true", "yes", "on"]

-- if the 'nocite' Meta field contains a citation with id = '*',
-- create a cite with to all the references.
mkNociteWildcards :: [Reference] -> [[Citation]] -> [[Citation]]
mkNociteWildcards refs nocites =
  map (\citgrp -> expandStar citgrp) nocites
  where expandStar cs =
         case [c | c <- cs
                 , citationId c == "*"] of
              [] -> cs
              _  -> allcites
        allcites = map (\ref -> Citation{
                                  citationId = unLiteral (refId ref),
                                  citationPrefix = [],
                                  citationSuffix = [],
                                  citationMode = NormalCitation,
                                  citationNoteNum = 0,
                                  citationHash = 0 }) refs

removeNocaseSpans :: Inline -> [Inline]
removeNocaseSpans (Span ("",["nocase"],[]) xs) = xs
removeNocaseSpans x = [x]

-- | Process a 'Pandoc' document by adding citations formatted
-- according to a CSL style.  The style filename is derived from
-- the `csl` field of the metadata, and the references are taken
-- from the `references` field or read from a file in the `bibliography`
-- field.
processCites' :: Pandoc -> IO Pandoc
processCites' (Pandoc meta blocks) = do
  mbcsldir <- E.catch (Just <$> getAppUserDataDirectory "csl") $ \e ->
                 if isDoesNotExistError e
                    then return Nothing
                    else E.throwIO e
  mbpandocdir <- E.catch (Just <$> getAppUserDataDirectory "pandoc") $ \e ->
                 if isDoesNotExistError e
                    then return Nothing
                    else E.throwIO e
  let inlineRefError s = error $ "Error parsing references: " ++ s
  let inlineRefs = either inlineRefError id
                   $ convertRefs $ lookupMeta "references" meta
  let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
                >>= toPath
  let mbLocale = (lookupMeta "lang" meta `mplus` lookupMeta "locale" meta)
                   >>= toPath
  let getDefaultCSL' = case mbcsldir of
                            Just csldir -> do
                              let f = csldir </> "chicago-author-date.csl"
                              exists <- doesFileExist f
                              if exists
                                 then L.readFile f
                                 else getDefaultCSL
                            Nothing -> getDefaultCSL
  csl <- case cslfile of
               Just f | not (null f) -> E.catch
                   (readCSLFile mbLocale f) $ \e ->
                        E.throwIO (InvalidXMLFile f e)
               _ -> do
                 -- get default CSL: look first in ~/.csl, and take
                 -- from distribution if not found
                 raw <- case mbpandocdir of
                          Just pandocdir -> do
                            let f = pandocdir </> "default.csl"
                            exists <- doesFileExist f
                            if exists
                               then L.readFile f
                               else getDefaultCSL'
                          Nothing -> getDefaultCSL'
                 localizeCSL mbLocale $ parseCSL' raw
  -- set LANG environment from locale; this affects unicode collation
  -- if pandoc-citeproc compiled with unicode_collation flag
  case styleLocale csl of
       (l:_) -> do
         setEnv "LC_ALL" (localeLang l)
         setEnv "LANG"   (localeLang l)
       []    -> do
         envlang <- getEnv "LANG"
         if null envlang
            then do
              -- Note that LANG needs to be set for bibtex conversion:
              setEnv "LANG" "en-US.UTF-8"
              setEnv "LC_ALL" "en-US.UTF-8"
            else do
              setEnv "LC_ALL" envlang
  bibRefs <- getBibRefs $ maybe (MetaList []) id
                        $ lookupMeta "bibliography" meta
  let refs = inlineRefs ++ bibRefs
  let cslAbbrevFile = lookupMeta "citation-abbreviations" meta >>= toPath
  let skipLeadingSpace = L.dropWhile (\s -> s == 32 || (s >= 9 && s <= 13))
  abbrevs <- maybe (return (Abbreviations M.empty))
             (\f -> findFile (maybe ["."] (\g -> [".", g]) mbcsldir) f >>=
                    maybe (error $ "Could not find " ++ f) return >>=
               L.readFile >>=
               either error return . eitherDecode . skipLeadingSpace)
             cslAbbrevFile
  let csl' = csl{ styleAbbrevs = abbrevs }
  return $ processCites csl' refs $ Pandoc meta blocks

toPath :: MetaValue -> Maybe String
toPath (MetaString s) = Just s
toPath (MetaInlines ils) = Just $ stringify ils
toPath _ = Nothing

getBibRefs :: MetaValue -> IO [Reference]
getBibRefs (MetaList xs) = concat `fmap` mapM getBibRefs xs
getBibRefs (MetaInlines xs) = getBibRefs (MetaString $ stringify xs)
getBibRefs (MetaString s) = do
  path <- findFile ["."] s >>= maybe (error $ "Could not find " ++ s) return
  map unescapeRefId `fmap` readBiblioFile path
getBibRefs _ = return []

-- unescape reference ids, which may contain XML entities, so
-- that we can do lookups with regular string equality
unescapeRefId :: Reference -> Reference
unescapeRefId ref = ref{ refId = Literal $ decodeEntities (unLiteral $ refId ref) }

decodeEntities :: String -> String
decodeEntities [] = []
decodeEntities ('&':xs) =
  let (ys,zs) = break (==';') xs
  in  case zs of
           ';':ws -> case lookupEntity ('&':ys ++ ";") of
#if MIN_VERSION_tagsoup(0,13,0)
                                       Just s  -> s ++ decodeEntities ws
#else
                                       Just c  -> [c] ++ decodeEntities ws
#endif
                                       Nothing -> '&' : decodeEntities xs
           _      -> '&' : decodeEntities xs
decodeEntities (x:xs) = x : decodeEntities xs

-- | Substitute 'Cite' elements with formatted citations.
processCite :: Style -> M.Map [Citation] Formatted -> Inline -> Inline
processCite s cs (Cite t _) =
   case M.lookup t cs of
        Just (Formatted (x:xs)) -> Cite t (renderPandoc s (Formatted (x:xs)))
        _             -> Strong [Str "???"]  -- TODO raise error instead?
processCite _ _ x = x

isNote :: Inline -> Bool
isNote (Note _) = True
isNote (Cite _ [Note _]) = True
isNote _ = False

mvPunctInsideQuote :: Inline -> Inline -> [Inline]
mvPunctInsideQuote (Quoted qt ils) (Str s) | s `elem` [".", ","] =
  [Quoted qt (init ils ++ (mvPunctInsideQuote (last ils) (Str s)))]
mvPunctInsideQuote il il' = [il, il']

isSpacy :: Inline -> Bool
isSpacy Space = True
isSpacy SoftBreak = True
isSpacy _ = False

mvPunct :: Bool -> Style -> [Inline] -> [Inline]
mvPunct _ _ (x : Space : xs) | isSpacy x = x : xs
mvPunct moveNotes _ (s : x : ys) | isSpacy s, isNote x, startWithPunct ys =
  if moveNotes
     then Str (headInline ys) : x : tailInline ys
     else x : ys
mvPunct moveNotes _ (Cite cs ils : ys) |
     length ils > 1
   , isNote (last ils)
   , startWithPunct ys
   , moveNotes
   = Cite cs (init ils ++ [Str (headInline ys) | not (endWithPunct False (init ils))]
     ++ [last ils]) : tailInline ys
mvPunct moveNotes sty (q@(Quoted _ _) : w@(Str _) : x : ys)
  | isNote x, isPunctuationInQuote sty, moveNotes  =
    mvPunctInsideQuote q w ++ (x : ys)
mvPunct _ _ (s : x : ys) | isSpacy s, isNote x = x : ys
mvPunct _ _ (s : x@(Cite _ (Superscript _ : _)) : ys) | isSpacy s = x : ys
mvPunct _ _ xs = xs

endWithPunct :: Bool -> [Inline] -> Bool
endWithPunct _ [] = True
endWithPunct onlyFinal xs@(_:_) =
  case reverse (stringify xs) of
       []                       -> True
       -- covers .), .", etc.:
       (d:c:_) | isPunctuation d && not onlyFinal
                && isEndPunct c -> True
       (c:_) | isEndPunct c     -> True
             | otherwise        -> False
  where isEndPunct c = c `elem` (".,;:!?" :: String)

startWithPunct :: [Inline] -> Bool
startWithPunct = and . map (`elem` (".,;:!?" :: String)) . headInline

deNote :: Pandoc -> Pandoc
deNote = topDown go
  where go (Cite (c:cs) [Note [Para xs]]) =
            Cite (c:cs) [Note [Para $ toCapital xs]]
        go (Note xs) = Note $ topDown go' xs
        go x = x
        go' (x : Cite cs [Note [Para xs]] : ys) | not (isSpacy x) =
             x : Str "," : Space : comb (\zs -> [Cite cs zs]) xs ys
        go' (x : Note [Para xs] : ys) | not (isSpacy x) =
             x : Str "," : Space : comb id xs ys
        go' (Cite cs [Note [Para xs]] : ys) = comb (\zs -> [Cite cs zs]) xs ys
        go' (Note [Para xs] : ys) = comb id xs ys
        go' xs = xs

comb :: ([Inline] -> [Inline]) -> [Inline] -> [Inline] -> [Inline]
comb f xs ys =
  let xs' = if startWithPunct ys && endWithPunct True xs
               then initInline $ removeLeadingPunct xs
               else removeLeadingPunct xs
      removeLeadingPunct (Str [c] : s : zs)
          | isSpacy s && (c == ',' || c == '.' || c == ':') = zs
      removeLeadingPunct zs = zs
  in  f xs' ++ ys

-- | Retrieve all citations from a 'Pandoc' docuument. To be used with
-- 'query'.
getCitation :: Inline -> [[Citation]]
getCitation i | Cite t _ <- i = [t]
              | otherwise     = []

setHashes :: Inline -> State Int Inline
setHashes i | Cite t ils <- i = do t' <- mapM setHash t
                                   return $ Cite t' ils
            | otherwise       = return i

setHash :: Citation -> State Int Citation
setHash c = do
  ident <- get
  put $ ident + 1
  return c{ citationHash = ident }

toCslCite :: LocatorMap -> Citation -> CSL.Cite
toCslCite locMap c
    = let (la, lo, s)  = locatorWords locMap $ citationSuffix c
          s'      = case (la,lo,s) of
                         -- treat a bare locator as if it begins with space
                         -- so @item1 [blah] is like [@item1, blah]
                         ("","",(x:_))
                           | not (isPunct x) -> Space : s
                         _                   -> s
          isPunct (Str (x:_)) = isPunctuation x
          isPunct _           = False
      in   emptyCite { CSL.citeId         = citationId c
                     , CSL.citePrefix     = Formatted $ citationPrefix c
                     , CSL.citeSuffix     = Formatted s'
                     , CSL.citeLabel      = la
                     , CSL.citeLocator    = lo
                     , CSL.citeNoteNumber = show $ citationNoteNum c
                     , CSL.authorInText   = citationMode c == AuthorInText
                     , CSL.suppressAuthor = citationMode c == SuppressAuthor
                     , CSL.citeHash       = citationHash c
                     }

locatorWords :: LocatorMap -> [Inline] -> (String, String, [Inline])
locatorWords locMap inp =
  case parse (pLocatorWords locMap) "suffix" $
         splitStrWhen (\c -> isLocatorPunct c || isSpace c) inp of
       Right r   -> r
       Left _    -> ("","",inp)

pLocatorWords :: LocatorMap -> Parsec [Inline] st (String, String, [Inline])
pLocatorWords locMap = do
  (la,lo) <- pLocator locMap
  s <- getInput -- rest is suffix
  return (la, lo, s)

pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline
pMatch condition = try $ do
  t <- anyToken
  guard $ condition t
  return t

pSpace :: Parsec [Inline] st Inline
pSpace = pMatch (\t -> isSpacy t || t == Str "\160")

pLocator :: LocatorMap -> Parsec [Inline] st (String, String)
pLocator locMap = try $ do
  optional $ pMatch (== Str ",")
  optional pSpace
  la <- try (do ts <- many1 (notFollowedBy (pWordWithDigits True) >> anyToken)
                case M.lookup (trim (stringify ts)) locMap of
                       Just l  -> return l
                       Nothing -> mzero)
      <|> (lookAhead pDigit >> return "page")
  g <- pWordWithDigits True
  gs <- many (pWordWithDigits False)
  let lo = concat (g:gs)
  return (la, lo)

pRoman :: Parsec [Inline] st String
pRoman = try $ do
  t <- anyToken
  case t of
       Str xs -> case parseRomanNumeral xs of
                      Nothing -> mzero
                      Just _  -> return xs
       _      -> mzero

-- we want to capture:  123, 123A, C22, XVII, 33-44, 22-33; 22-11
pWordWithDigits :: Bool -> Parsec [Inline] st String
pWordWithDigits isfirst = try $ do
  punct <- if isfirst
              then return ""
              else stringify `fmap` pLocatorPunct
  sp <- option "" (pSpace >> return " ")
  s <-  pRoman <|>
        try (do ts <- many1 (notFollowedBy pSpace >>
                             notFollowedBy pLocatorPunct >>
                             anyToken)
                let ts' = stringify ts
                guard (any isDigit ts')
                return ts')
  return $ punct ++ sp ++ s

pDigit :: Parsec [Inline] st ()
pDigit = do
  t <- anyToken
  case t of
      Str (d:_) | isDigit d -> return ()
      _ -> mzero

pLocatorPunct :: Parsec [Inline] st Inline
pLocatorPunct = pMatch isLocatorPunct'
  where isLocatorPunct' (Str [c]) = isLocatorPunct c
        isLocatorPunct' _ = False

isLocatorPunct :: Char -> Bool
isLocatorPunct ':' = False
isLocatorPunct c   = isPunctuation c

type LocatorMap = M.Map String String

locatorMap :: Style -> LocatorMap
locatorMap sty =
  foldr (\term -> M.insert (termSingular term) (cslTerm term)
                . M.insert (termPlural term) (cslTerm term))
    M.empty
    (concatMap localeTerms $ styleLocale sty)