{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} module Text.CSL.Pandoc (processCites, processCites') where import Prelude import Control.Applicative ((<|>)) import qualified Control.Exception as E import Control.Monad import Control.Monad.State import Data.Aeson import qualified Data.ByteString.Lazy as L import Data.Char (isDigit, isPunctuation, isSpace, toLower) import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe (fromMaybe) import System.Directory (getAppUserDataDirectory) import System.Environment (getEnv) import System.FilePath import System.IO.Error (isDoesNotExistError) import System.SetEnv (setEnv) import Text.CSL.Data (getDefaultCSL) import Text.CSL.Exception import Text.CSL.Input.Bibutils (convertRefs, readBiblioFile) import Text.CSL.Output.Pandoc (renderPandoc, renderPandoc', headInline, initInline, tailInline, toCapital) import Text.CSL.Parser import Text.CSL.Proc import Text.CSL.Reference hiding (Value, processCites) import Text.CSL.Style hiding (Citation (..), Cite (..)) import qualified Text.CSL.Style as CSL import Text.CSL.Util (findFile, lastInline, parseRomanNumeral, splitStrWhen, tr', trim) import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc import Text.Pandoc.Builder (deleteMeta, setMeta) import Text.Pandoc.Shared (stringify) import Text.Pandoc.Walk import Text.Parsec hiding (State, (<|>)) -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style. Add a bibliography (if one is called -- for) at the end of the document. processCites :: Style -> [Reference] -> Pandoc -> Pandoc processCites style refs (Pandoc m1 b1) = let metanocites = lookupMeta "nocite" m1 nocites = mkNociteWildcards refs . query getCitation <$> metanocites Pandoc m2 b2 = evalState (walkM setHashes $ Pandoc (deleteMeta "nocite" m1) b1) 1 grps = query getCitation (Pandoc m2 b2) ++ fromMaybe [] nocites locMap = locatorMap style result = citeproc procOpts{ linkCitations = isLinkCitations m2} style refs (setNearNote style $ map (map (toCslCite locMap)) grps) cits_map = tr' "cits_map" $ M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' style) $ zip (bibliography result) (citationIds result) moveNotes = maybe True truish $ lookupMeta "notes-after-punctuation" m1 Pandoc m3 bs = walk (mvPunct moveNotes style) . deNote . walk (processCite style cits_map) $ Pandoc m2 b2 m = case metanocites of Nothing -> m3 Just x -> setMeta "nocite" x m3 in Pandoc m $ walk (concatMap removeNocaseSpans) $ insertRefs m biblioList bs -- if document contains a Div with id="refs", insert -- references as its contents. Otherwise, insert references -- at the end of the document in a Div with id="refs" insertRefs :: Meta -> [Block] -> [Block] -> [Block] insertRefs _ [] bs = bs insertRefs meta refs bs = if isRefRemove meta then bs else case runState (walkM go bs) False of (bs', True) -> bs' (_, False) -> case reverse bs of Header lev (id',classes,kvs) ys : xs -> reverse xs ++ [Header lev (id',addUnNumbered classes,kvs) ys, Div ("refs",["references"],[]) refs] _ -> bs ++ refHeader ++ [Div ("refs",["references"],[]) refs] where go :: Block -> State Bool Block go (Div attr@("refs",_,_) xs) = do put True -- refHeader isn't used if you have an explicit references div return $ Div attr (xs ++ refs) go x = return x addUnNumbered cs = "unnumbered" : [c | c <- cs, c /= "unnumbered"] refHeader = case refTitle meta of Just ils -> [Header 1 ("bibliography", ["unnumbered"], []) ils] _ -> [] refTitle :: Meta -> Maybe [Inline] refTitle meta = case lookupMeta "reference-section-title" meta of Just (MetaString s) -> Just [Str s] Just (MetaInlines ils) -> Just ils Just (MetaBlocks [Plain ils]) -> Just ils Just (MetaBlocks [Para ils]) -> Just ils _ -> Nothing isRefRemove :: Meta -> Bool isRefRemove meta = maybe False truish $ lookupMeta "suppress-bibliography" meta isLinkCitations :: Meta -> Bool isLinkCitations meta = maybe False truish $ lookupMeta "link-citations" meta truish :: MetaValue -> Bool truish (MetaBool t) = t truish (MetaString s) = isYesValue (map toLower s) truish (MetaInlines ils) = isYesValue (map toLower (stringify ils)) truish (MetaBlocks [Plain ils]) = isYesValue (map toLower (stringify ils)) truish _ = False isYesValue :: String -> Bool isYesValue "t" = True isYesValue "true" = True isYesValue "yes" = True isYesValue "on" = True isYesValue _ = False -- if the 'nocite' Meta field contains a citation with id = '*', -- create a cite with to all the references. mkNociteWildcards :: [Reference] -> [[Citation]] -> [[Citation]] mkNociteWildcards refs nocites = map expandStar nocites where expandStar cs = case [c | c <- cs , citationId c == "*"] of [] -> cs _ -> allcites allcites = map (\ref -> Citation{ citationId = unLiteral (refId ref), citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0 }) refs removeNocaseSpans :: Inline -> [Inline] removeNocaseSpans (Span ("",["nocase"],[]) xs) = xs removeNocaseSpans x = [x] -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style. The style filename is derived from -- the `csl` field of the metadata, and the references are taken -- from the `references` field or read from a file in the `bibliography` -- field. processCites' :: Pandoc -> IO Pandoc processCites' (Pandoc meta blocks) = do mbcsldir <- E.catch (Just <$> getAppUserDataDirectory "csl") $ \e -> if isDoesNotExistError e then return Nothing else E.throwIO e mbpandocdir <- E.catch (Just <$> getAppUserDataDirectory "pandoc") $ \e -> if isDoesNotExistError e then return Nothing else E.throwIO e let inlineRefError s = E.throw $ ErrorParsingReferences s let inlineRefs = either inlineRefError id $ convertRefs $ lookupMeta "references" meta let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta) >>= toPath let mbLocale = (lookupMeta "lang" meta `mplus` lookupMeta "locale" meta) >>= toPath let tryReadCSLFile Nothing _ = mzero tryReadCSLFile (Just d) f = E.catch (readCSLFile mbLocale (d f)) (\(_ :: E.SomeException) -> mzero) csl <- case cslfile of Just f | not (null f) -> readCSLFile mbLocale f _ -> tryReadCSLFile mbpandocdir "default.csl" `mplus` tryReadCSLFile mbcsldir "chicago-author-date.csl" `mplus` (getDefaultCSL >>= localizeCSL mbLocale . parseCSL') -- set LANG environment from locale; this affects unicode collation -- if pandoc-citeproc compiled with unicode_collation flag case styleLocale csl of (l:_) -> do setEnv "LC_ALL" (localeLang l) setEnv "LANG" (localeLang l) [] -> do envlang <- getEnv "LANG" if null envlang then do -- Note that LANG needs to be set for bibtex conversion: setEnv "LANG" "en-US.UTF-8" setEnv "LC_ALL" "en-US.UTF-8" else setEnv "LC_ALL" envlang let citids = query getCitationIds (Pandoc meta blocks) let idpred = if "*" `Set.member` citids then const True else (`Set.member` citids) bibRefs <- getBibRefs idpred $ fromMaybe (MetaList []) $ lookupMeta "bibliography" meta let refs = inlineRefs ++ bibRefs let cslAbbrevFile = lookupMeta "citation-abbreviations" meta >>= toPath let skipLeadingSpace = L.dropWhile (\s -> s == 32 || (s >= 9 && s <= 13)) abbrevs <- maybe (return (Abbreviations M.empty)) (\f -> findFile (maybe ["."] (\g -> [".", g]) mbcsldir) f >>= maybe (E.throwIO $ CouldNotFindAbbrevFile f) return >>= L.readFile >>= either error return . eitherDecode . skipLeadingSpace) cslAbbrevFile let csl' = csl{ styleAbbrevs = abbrevs } return $ processCites csl' refs $ Pandoc meta blocks toPath :: MetaValue -> Maybe String toPath (MetaString s) = Just s -- take last in a list toPath (MetaList xs) = case reverse xs of [] -> Nothing (x:_) -> toPath x toPath (MetaInlines ils) = Just $ stringify ils toPath _ = Nothing getBibRefs :: (String -> Bool) -> MetaValue -> IO [Reference] getBibRefs idpred (MetaList xs) = concat `fmap` mapM (getBibRefs idpred) xs getBibRefs idpred (MetaInlines xs) = getBibRefs idpred (MetaString $ stringify xs) getBibRefs idpred (MetaString s) = do path <- findFile ["."] s >>= maybe (E.throwIO $ CouldNotFindBibFile s) return map unescapeRefId `fmap` readBiblioFile idpred path getBibRefs _ _ = return [] -- unescape reference ids, which may contain XML entities, so -- that we can do lookups with regular string equality unescapeRefId :: Reference -> Reference unescapeRefId ref = ref{ refId = Literal $ decodeEntities (unLiteral $ refId ref) } decodeEntities :: String -> String decodeEntities [] = [] decodeEntities ('&':xs) = let (ys,zs) = break (==';') xs in case zs of ';':ws -> case lookupEntity ('&':ys ++ ";") of #if MIN_VERSION_tagsoup(0,13,0) Just s -> s ++ decodeEntities ws #else Just c -> [c] ++ decodeEntities ws #endif Nothing -> '&' : decodeEntities xs _ -> '&' : decodeEntities xs decodeEntities (x:xs) = x : decodeEntities xs -- | Substitute 'Cite' elements with formatted citations. processCite :: Style -> M.Map [Citation] Formatted -> Inline -> Inline processCite s cs (Cite t _) = case M.lookup t cs of Just (Formatted xs) | not (null xs) || all isSuppressAuthor t -> Cite t (renderPandoc s (Formatted xs)) _ -> Strong [Str "???"] -- TODO raise error instead? where isSuppressAuthor c = citationMode c == SuppressAuthor processCite _ _ x = x isNote :: Inline -> Bool isNote (Note _) = True isNote (Cite _ [Note _]) = True isNote _ = False mvPunctInsideQuote :: Inline -> Inline -> [Inline] mvPunctInsideQuote (Quoted qt ils) (Str s) | s `elem` [".", ","] = [Quoted qt (init ils ++ mvPunctInsideQuote (last ils) (Str s))] mvPunctInsideQuote il il' = [il, il'] isSpacy :: Inline -> Bool isSpacy Space = True isSpacy SoftBreak = True isSpacy _ = False mvPunct :: Bool -> Style -> [Inline] -> [Inline] mvPunct moveNotes sty (x : Space : xs) | isSpacy x = x : mvPunct moveNotes sty xs mvPunct moveNotes sty (q : s : x : ys) | isSpacy s , isNote x , startWithPunct ys = if moveNotes then mvPunct moveNotes sty $ q : Str (headInline ys) : x : tailInline ys else q : x : mvPunct moveNotes sty ys mvPunct moveNotes sty (Cite cs ils : ys) | length ils > 1 , isNote (last ils) , startWithPunct ys , moveNotes = Cite cs (init ils ++ [Str (headInline ys) | not (endWithPunct False (init ils))] ++ [last ils]) : mvPunct moveNotes sty (tailInline ys) mvPunct moveNotes sty (q@(Quoted _ _) : w@(Str _) : x : ys) | isNote x , isPunctuationInQuote sty , moveNotes = mvPunctInsideQuote q w ++ (x : mvPunct moveNotes sty ys) mvPunct moveNotes sty (s : x : ys) | isSpacy s, isNote x = x : mvPunct moveNotes sty ys mvPunct moveNotes sty (s : x@(Cite _ (Superscript _ : _)) : ys) | isSpacy s = x : mvPunct moveNotes sty ys mvPunct moveNotes sty (Cite cs ils : Str "." : ys) | lastInline ils == "." = Cite cs ils : mvPunct moveNotes sty ys mvPunct moveNotes sty (x:xs) = x : mvPunct moveNotes sty xs mvPunct _ _ [] = [] endWithPunct :: Bool -> [Inline] -> Bool endWithPunct _ [] = True endWithPunct onlyFinal xs@(_:_) = case reverse (stringify xs) of [] -> True -- covers .), .", etc.: (d:c:_) | isPunctuation d && not onlyFinal && isEndPunct c -> True (c:_) | isEndPunct c -> True | otherwise -> False where isEndPunct c = c `elem` (".,;:!?" :: String) startWithPunct :: [Inline] -> Bool startWithPunct = all (`elem` (".,;:!?" :: String)) . headInline deNote :: Pandoc -> Pandoc deNote = topDown go where go (Cite (c:cs) [Note [Para xs]]) = Cite (c:cs) [Note [Para $ toCapital xs]] go (Note xs) = Note $ topDown go' xs go x = x go' (x : Cite cs [Note [Para xs]] : ys) | not (isSpacy x) = x : Str "," : Space : comb (\zs -> [Cite cs zs]) xs ys go' (x : Note [Para xs] : ys) | not (isSpacy x) = x : Str "," : Space : comb id xs ys go' (Cite cs [Note [Para xs]] : ys) = comb (\zs -> [Cite cs zs]) xs ys go' (Note [Para xs] : ys) = comb id xs ys go' xs = xs comb :: ([Inline] -> [Inline]) -> [Inline] -> [Inline] -> [Inline] comb f xs ys = let xs' = if startWithPunct ys && endWithPunct True xs then initInline $ removeLeadingPunct xs else removeLeadingPunct xs removeLeadingPunct (Str [c] : s : zs) | isSpacy s && (c == ',' || c == '.' || c == ':') = zs removeLeadingPunct zs = zs in f xs' ++ ys -- | Retrieve all citations from a 'Pandoc' docuument. To be used with -- 'query'. getCitation :: Inline -> [[Citation]] getCitation i | Cite t _ <- i = [t] | otherwise = [] getCitationIds :: Inline -> Set.Set String getCitationIds (Cite cs _) = Set.fromList (map citationId cs) getCitationIds _ = mempty setHashes :: Inline -> State Int Inline setHashes i | Cite t ils <- i = do t' <- mapM setHash t return $ Cite t' ils | otherwise = return i setHash :: Citation -> State Int Citation setHash c = do ident <- get put $ ident + 1 return c{ citationHash = ident } toCslCite :: LocatorMap -> Citation -> CSL.Cite toCslCite locMap c = let (la, lo, s) = locatorWords locMap $ citationSuffix c s' = case (la,lo,s) of -- treat a bare locator as if it begins with space -- so @item1 [blah] is like [@item1, blah] ("","",x:_) | not (isPunct x) -> Space : s _ -> s isPunct (Str (x:_)) = isPunctuation x isPunct _ = False in emptyCite { CSL.citeId = citationId c , CSL.citePrefix = Formatted $ citationPrefix c , CSL.citeSuffix = Formatted s' , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = show $ citationNoteNum c , CSL.authorInText = citationMode c == AuthorInText , CSL.suppressAuthor = citationMode c == SuppressAuthor , CSL.citeHash = citationHash c } locatorWords :: LocatorMap -> [Inline] -> (String, String, [Inline]) locatorWords locMap inp = case parse (pLocatorWords locMap) "suffix" $ splitStrWhen (\c -> isLocatorPunct c || isSpace c) inp of Right r -> r Left _ -> ("","",inp) pLocatorWords :: LocatorMap -> Parsec [Inline] st (String, String, [Inline]) pLocatorWords locMap = do (la,lo) <- pLocator locMap s <- getInput -- rest is suffix return (la, lo, s) pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline pMatch condition = try $ do t <- anyToken guard $ condition t return t pSpace :: Parsec [Inline] st Inline pSpace = pMatch (\t -> isSpacy t || t == Str "\160") pLocator :: LocatorMap -> Parsec [Inline] st (String, String) pLocator locMap = try $ do optional $ pMatch (== Str ",") optional pSpace la <- try (do ts <- many1 (notFollowedBy (pWordWithDigits True) >> anyToken) case M.lookup (trim (stringify ts)) locMap of Just l -> return l Nothing -> mzero) <|> (lookAhead pDigit >> return "page") g <- pWordWithDigits True gs <- many (pWordWithDigits False) let lo = concat (g:gs) return (la, lo) pRoman :: Parsec [Inline] st String pRoman = try $ do t <- anyToken case t of Str xs -> case parseRomanNumeral xs of Nothing -> mzero Just _ -> return xs _ -> mzero -- we want to capture: 123, 123A, C22, XVII, 33-44, 22-33; 22-11 pWordWithDigits :: Bool -> Parsec [Inline] st String pWordWithDigits isfirst = try $ do punct <- if isfirst then return "" else stringify `fmap` pLocatorPunct sp <- option "" (pSpace >> return " ") s <- pRoman <|> try (do ts <- many1 (notFollowedBy pSpace >> notFollowedBy pLocatorPunct >> anyToken) let ts' = stringify ts guard (any isDigit ts') return ts') return $ punct ++ sp ++ s pDigit :: Parsec [Inline] st () pDigit = do t <- anyToken case t of Str (d:_) | isDigit d -> return () _ -> mzero pLocatorPunct :: Parsec [Inline] st Inline pLocatorPunct = pMatch isLocatorPunct' where isLocatorPunct' (Str [c]) = isLocatorPunct c isLocatorPunct' _ = False isLocatorPunct :: Char -> Bool isLocatorPunct ':' = False isLocatorPunct c = isPunctuation c type LocatorMap = M.Map String String locatorMap :: Style -> LocatorMap locatorMap sty = foldr (\term -> M.insert (termSingular term) (cslTerm term) . M.insert (termPlural term) (cslTerm term)) M.empty (concatMap localeTerms $ styleLocale sty)