{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} module Text.CSL.Pandoc (processCites, processCites') where import Prelude import Control.Applicative ((<|>)) import qualified Control.Exception as E import Control.Monad import Control.Monad.State import Data.Aeson import qualified Data.ByteString.Lazy as L import Data.Char (isDigit, isPunctuation, isSpace) import qualified Data.Map as M import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import Data.Maybe (fromMaybe) import System.Directory (getAppUserDataDirectory) import System.Environment (getEnv) import System.FilePath import System.IO.Error (isDoesNotExistError) import System.SetEnv (setEnv) import Text.CSL.Data (getDefaultCSL) import Text.CSL.Exception import Text.CSL.Input.Bibutils (convertRefs, readBiblioFile) import Text.CSL.Output.Pandoc (renderPandoc, renderPandoc', headInline, initInline, tailInline, toCapital) import Text.CSL.Parser import Text.CSL.Proc import Text.CSL.Reference hiding (Value, processCites) import Text.CSL.Style hiding (Citation (..), Cite (..)) import qualified Text.CSL.Style as CSL import Text.CSL.Util (findFile, lastInline, parseRomanNumeral, splitStrWhen, tr', trim) import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc import Text.Pandoc.Builder (deleteMeta, setMeta) import Text.Pandoc.Shared (stringify, ordNub) import Text.Pandoc.Walk import Text.Parsec hiding (State, (<|>)) -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style. Add a bibliography (if one is called -- for) at the end of the document. processCites :: Style -> [Reference] -> Pandoc -> Pandoc processCites style refs (Pandoc m1 b1) = let metanocites = lookupMeta "nocite" m1 nocites = mkNociteWildcards refs . query getCitation <$> metanocites Pandoc m2 b2 = evalState (walkM setHashes $ Pandoc (deleteMeta "nocite" m1) b1) 1 grps = query getCitation (Pandoc m2 b2) ++ fromMaybe [] nocites locMap = locatorMap style result = citeproc procOpts{ linkCitations = isLinkCitations m2} style refs (setNearNote style $ map (map (toCslCite locMap)) grps) cits_map = tr' "cits_map" $ M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' style) $ zip (bibliography result) (citationIds result) moveNotes = maybe True truish $ lookupMeta "notes-after-punctuation" m1 Pandoc m3 bs = walk (mvPunct moveNotes style) . deNote . walk (processCite style cits_map) $ Pandoc m2 b2 m = case metanocites of Nothing -> m3 Just x -> setMeta "nocite" x m3 notemap = mkNoteMap (Pandoc m3 bs) hanging = (== Just "true") (biblio style >>= lookup "hanging-indent" . bibOptions) in Pandoc m $ walk (addFirstNoteNumber notemap) $ walk (concatMap removeNocaseSpans) $ insertRefs hanging m biblioList bs addFirstNoteNumber :: M.Map Text Int -> Inline -> Inline addFirstNoteNumber notemap s@(Span ("",["first-reference-note-number"],[("refid",refid)]) _) = case M.lookup refid notemap of Nothing -> s Just n -> Str $ T.pack (show n) addFirstNoteNumber _ -- see below, these spans added by deNote (Note [Para (Span ("",["reference-id-list"],_) [] : ils)]) = Note [Para ils] addFirstNoteNumber _ x = x mkNoteMap :: Pandoc -> M.Map Text Int mkNoteMap doc = foldr go mempty $ splitUp $ zip [1..] $ query getNoteCitationIds doc where splitUp :: [(Int, [Text])] -> [(Int, Text)] splitUp = concatMap (\(n,ss) -> map (n,) ss) go :: (Int, Text) -> M.Map Text Int -> M.Map Text Int go (notenumber, citeid) = M.insert citeid notenumber -- if document contains a Div with id="refs", insert -- references as its contents. Otherwise, insert references -- at the end of the document in a Div with id="refs" insertRefs :: Bool -> Meta -> [Block] -> [Block] -> [Block] insertRefs _ _ [] bs = bs insertRefs hanging meta refs bs = if isRefRemove meta then bs else case runState (walkM go bs) False of (bs', True) -> bs' (_, False) -> case refTitle meta of Nothing -> case reverse bs of Header lev (id',classes,kvs) ys : xs -> reverse xs ++ [Header lev (id',addUnNumbered classes,kvs) ys, Div ("refs",refclasses,[]) refs] _ -> bs ++ [refDiv] Just ils -> bs ++ [Header 1 ("bibliography", ["unnumbered"], []) ils, refDiv] where refclasses = "references" : if hanging then ["hanging-indent"] else [] refDiv = Div ("refs", refclasses, []) refs addUnNumbered cs = "unnumbered" : [c | c <- cs, c /= "unnumbered"] go :: Block -> State Bool Block go (Div ("refs",cs,kvs) xs) = do put True -- refHeader isn't used if you have an explicit references div let cs' = ordNub $ cs ++ refclasses return $ Div ("refs",cs',kvs) (xs ++ refs) go x = return x refTitle :: Meta -> Maybe [Inline] refTitle meta = case lookupMeta "reference-section-title" meta of Just (MetaString s) -> Just [Str s] Just (MetaInlines ils) -> Just ils Just (MetaBlocks [Plain ils]) -> Just ils Just (MetaBlocks [Para ils]) -> Just ils _ -> Nothing isRefRemove :: Meta -> Bool isRefRemove meta = maybe False truish $ lookupMeta "suppress-bibliography" meta isLinkCitations :: Meta -> Bool isLinkCitations meta = maybe False truish $ lookupMeta "link-citations" meta truish :: MetaValue -> Bool truish (MetaBool t) = t truish (MetaString s) = isYesValue (T.toLower s) truish (MetaInlines ils) = isYesValue (T.toLower (stringify ils)) truish (MetaBlocks [Plain ils]) = isYesValue (T.toLower (stringify ils)) truish _ = False isYesValue :: Text -> Bool isYesValue "t" = True isYesValue "true" = True isYesValue "yes" = True isYesValue "on" = True isYesValue _ = False -- if the 'nocite' Meta field contains a citation with id = '*', -- create a cite with to all the references. mkNociteWildcards :: [Reference] -> [[Citation]] -> [[Citation]] mkNociteWildcards refs = map expandStar where expandStar cs = case [c | c <- cs , citationId c == "*"] of [] -> cs _ -> allcites allcites = map (\ref -> Citation{ citationId = unLiteral (refId ref), citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0 }) refs removeNocaseSpans :: Inline -> [Inline] removeNocaseSpans (Span ("",["nocase"],[]) xs) = xs removeNocaseSpans x = [x] -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style. The style filename is derived from -- the `csl` field of the metadata, and the references are taken -- from the `references` field or read from a file in the `bibliography` -- field. processCites' :: Pandoc -> IO Pandoc processCites' (Pandoc meta blocks) = do mbcsldir <- E.catch (Just <$> getAppUserDataDirectory "csl") $ \e -> if isDoesNotExistError e then return Nothing else E.throwIO e mbpandocdir <- E.catch (Just <$> getAppUserDataDirectory "pandoc") $ \e -> if isDoesNotExistError e then return Nothing else E.throwIO e let inlineRefError s = E.throw $ ErrorParsingReferences s let inlineRefs = either inlineRefError id $ convertRefs $ lookupMeta "references" meta let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta) >>= toPath let mbLocale = (lookupMeta "lang" meta `mplus` lookupMeta "locale" meta) >>= toText let tryReadCSLFile Nothing _ = mzero tryReadCSLFile (Just d) f = E.catch (readCSLFile mbLocale (d f)) (\(_ :: E.SomeException) -> mzero) csl <- case cslfile of Just f | not (null f) -> readCSLFile mbLocale f _ -> tryReadCSLFile mbpandocdir "default.csl" `mplus` tryReadCSLFile mbcsldir "chicago-author-date.csl" `mplus` (getDefaultCSL >>= localizeCSL mbLocale . parseCSL') -- set LANG environment from locale; this affects unicode collation -- if pandoc-citeproc compiled with unicode_collation flag case styleLocale csl of (l:_) -> do setEnv "LC_ALL" (T.unpack $ localeLang l) setEnv "LANG" (T.unpack $ localeLang l) [] -> do envlang <- getEnv "LANG" if null envlang then do -- Note that LANG needs to be set for bibtex conversion: setEnv "LANG" "en_US.UTF-8" setEnv "LC_ALL" "en_US.UTF-8" else setEnv "LC_ALL" envlang let citids = query getCitationIds (Pandoc meta blocks) let idpred = if "*" `Set.member` citids then const True else (`Set.member` citids) bibRefs <- getBibRefs idpred $ fromMaybe (MetaList []) $ lookupMeta "bibliography" meta let refs = inlineRefs ++ bibRefs let cslAbbrevFile = lookupMeta "citation-abbreviations" meta >>= toPath let skipLeadingSpace = L.dropWhile (\s -> s == 32 || (s >= 9 && s <= 13)) abbrevs <- maybe (return (Abbreviations M.empty)) (\f -> findFile (maybe ["."] (\g -> [".", g]) mbcsldir) f >>= maybe (E.throwIO $ CouldNotFindAbbrevFile f) return >>= L.readFile >>= either error return . eitherDecode . skipLeadingSpace) cslAbbrevFile let csl' = csl{ styleAbbrevs = abbrevs } return $ processCites (tr' "CSL" csl') refs $ Pandoc meta blocks toText :: MetaValue -> Maybe Text toText (MetaString s) = Just s -- take last in a list toText (MetaList xs) = case reverse xs of [] -> Nothing (x:_) -> toText x toText (MetaInlines ils) = Just $ stringify ils toText _ = Nothing toPath :: MetaValue -> Maybe String toPath (MetaString s) = Just $ T.unpack s -- take last in a list toPath (MetaList xs) = case reverse xs of [] -> Nothing (x:_) -> toPath x toPath (MetaInlines ils) = Just $ T.unpack $ stringify ils toPath _ = Nothing getBibRefs :: (Text -> Bool) -> MetaValue -> IO [Reference] getBibRefs idpred (MetaList xs) = concat `fmap` mapM (getBibRefs idpred) xs getBibRefs idpred (MetaInlines xs) = getBibRefs idpred (MetaString $ stringify xs) getBibRefs idpred (MetaString s) = do path <- findFile ["."] (T.unpack s) >>= maybe (E.throwIO $ CouldNotFindBibFile $ T.unpack s) return map unescapeRefId `fmap` readBiblioFile idpred path getBibRefs _ _ = return [] -- unescape reference ids, which may contain XML entities, so -- that we can do lookups with regular string equality unescapeRefId :: Reference -> Reference unescapeRefId ref = ref{ refId = Literal $ decodeEntities (unLiteral $ refId ref) } decodeEntities :: Text -> Text decodeEntities t = case T.uncons t of Nothing -> "" Just ('&',xs) -> let (ys,zs) = T.break (==';') xs in case T.uncons zs of Just (';',ws) -> case lookupEntity ('&': T.unpack ys ++ ";") of #if MIN_VERSION_tagsoup(0,13,0) Just s -> T.pack s <> decodeEntities ws #else Just c -> T.cons c (decodeEntities ws) #endif Nothing -> T.cons '&' (decodeEntities xs) _ -> T.cons '&' (decodeEntities xs) Just (x,xs) -> T.cons x (decodeEntities xs) -- | Substitute 'Cite' elements with formatted citations. processCite :: Style -> M.Map [Citation] Formatted -> Inline -> Inline processCite s cs (Cite t _) = case M.lookup t cs of Just (Formatted xs) | not (null xs) || all isSuppressAuthor t -> Cite t (renderPandoc s (Formatted xs)) _ -> Strong [Str "???"] -- TODO raise error instead? where isSuppressAuthor c = citationMode c == SuppressAuthor processCite _ _ x = x getNoteCitationIds :: Inline -> [[Text]] getNoteCitationIds (Note [Para (Span ("",["reference-id-list"] ,[("refids",refids)]) [] : _)]) -- see deNote below which inserts this special Span = [T.words refids] getNoteCitationIds (Note _) = [[]] getNoteCitationIds _ = [] isNote :: Inline -> Bool isNote (Note _) = True isNote (Cite _ [Note _]) = True -- the following allows citation styles that are "in-text" but use superscript -- references to be treated as if they are "notes" for the purposes of moving -- the citations after trailing punctuation (see ): isNote (Cite _ [Superscript _]) = True isNote _ = False mvPunctInsideQuote :: Inline -> Inline -> [Inline] mvPunctInsideQuote (Quoted qt ils) (Str s) | s `elem` [".", ","] = [Quoted qt (init ils ++ mvPunctInsideQuote (last ils) (Str s))] mvPunctInsideQuote il il' = [il, il'] isSpacy :: Inline -> Bool isSpacy Space = True isSpacy SoftBreak = True isSpacy _ = False mvPunct :: Bool -> Style -> [Inline] -> [Inline] mvPunct moveNotes sty (x : Space : xs) | isSpacy x = x : mvPunct moveNotes sty xs mvPunct moveNotes sty (q : s : x : ys) | isSpacy s , isNote x , startWithPunct ys = if moveNotes then mvPunct moveNotes sty $ case headInline ys of Nothing -> q : x : tailInline ys Just w -> q : Str (T.singleton w) : x : tailInline ys else q : x : mvPunct moveNotes sty ys mvPunct moveNotes sty (Cite cs ils : ys) | length ils > 1 , isNote (last ils) , startWithPunct ys , moveNotes = Cite cs (init ils ++ (case headInline ys of Nothing -> [] Just s' | not (endWithPunct False (init ils)) -> [Str $ T.singleton s'] | otherwise -> []) ++ [last ils]) : mvPunct moveNotes sty (tailInline ys) mvPunct moveNotes sty (q@(Quoted _ _) : w@(Str _) : x : ys) | isNote x , isPunctuationInQuote sty , moveNotes = mvPunctInsideQuote q w ++ (x : mvPunct moveNotes sty ys) mvPunct moveNotes sty (s : x : ys) | isSpacy s, isNote x = x : mvPunct moveNotes sty ys mvPunct moveNotes sty (s : x@(Cite _ (Superscript _ : _)) : ys) | isSpacy s = x : mvPunct moveNotes sty ys mvPunct moveNotes sty (Cite cs ils : Str "." : ys) | lastInline ils == Just '.' = Cite cs ils : mvPunct moveNotes sty ys mvPunct moveNotes sty (x:xs) = x : mvPunct moveNotes sty xs mvPunct _ _ [] = [] endWithPunct :: Bool -> [Inline] -> Bool endWithPunct _ [] = True endWithPunct onlyFinal xs@(_:_) = case reverse (T.unpack $ stringify xs) of [] -> True -- covers .), .", etc.: (d:c:_) | isPunctuation d && not onlyFinal && isEndPunct c -> True (c:_) | isEndPunct c -> True | otherwise -> False where isEndPunct c = c `elem` (".,;:!?" :: String) startWithPunct :: [Inline] -> Bool startWithPunct = all (`elem` (".,;:!?" :: String)) . headInline deNote :: Pandoc -> Pandoc deNote = topDown go where go (Cite (c:cs) [Note [Para xs]]) = Cite (c:cs) [Note [Para $ specialSpan (c:cs) : toCapital xs]] go (Note xs) = Note $ topDown go' xs go x = x -- we insert this to help getNoteCitationIds: specialSpan cs = Span ("",["reference-id-list"], [("refids", T.unwords (map citationId cs))]) [] go' (Str "(" : Cite cs [Note [Para xs]] : Str ")" : ys) = Str "(" : Cite cs xs : Str ")" : ys go' (x : Cite cs [Note [Para xs]] : ys) | not (isSpacy x) = x : Str "," : Space : comb (\zs -> [Cite cs zs]) xs ys go' (Str "(" : Note [Para xs] : Str ")" : ys) = Str "(" : xs ++ (Str ")" : ys) go' (x : Note [Para xs] : ys) | not (isSpacy x) = x : Str "," : Space : comb id xs ys go' (Cite cs [Note [Para xs]] : ys) = comb (\zs -> [Cite cs zs]) xs ys go' (Note [Para xs] : ys) = comb id xs ys go' xs = xs comb :: ([Inline] -> [Inline]) -> [Inline] -> [Inline] -> [Inline] comb f xs ys = let xs' = if startWithPunct ys && endWithPunct True xs then initInline $ removeLeadingPunct xs else removeLeadingPunct xs removeLeadingPunct (Str (T.unpack -> [c]) : s : zs) | isSpacy s && (c == ',' || c == '.' || c == ':') = zs removeLeadingPunct zs = zs in f xs' ++ ys -- | Retrieve all citations from a 'Pandoc' document. To be used with -- 'query'. getCitation :: Inline -> [[Citation]] getCitation i | Cite t _ <- i = [t] | otherwise = [] getCitationIds :: Inline -> Set.Set Text getCitationIds (Cite cs _) = Set.fromList (map citationId cs) getCitationIds _ = mempty setHashes :: Inline -> State Int Inline setHashes i | Cite t ils <- i = do t' <- mapM setHash t return $ Cite t' ils | otherwise = return i setHash :: Citation -> State Int Citation setHash c = do ident <- get put $ ident + 1 return c{ citationHash = ident } toCslCite :: LocatorMap -> Citation -> CSL.Cite toCslCite locMap c = let (la, lo, s) = locatorWords locMap $ citationSuffix c s' = case (la,lo,s) of -- treat a bare locator as if it begins with space -- so @item1 [blah] is like [@item1, blah] ("","",x:_) | not (isPunct x) -> Space : s _ -> s isPunct (Str (T.uncons -> Just (x,_))) = isPunctuation x isPunct _ = False in emptyCite { CSL.citeId = citationId c , CSL.citePrefix = Formatted $ citationPrefix c , CSL.citeSuffix = Formatted s' , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = T.pack $ show $ citationNoteNum c , CSL.authorInText = citationMode c == AuthorInText , CSL.suppressAuthor = citationMode c == SuppressAuthor , CSL.citeHash = citationHash c } splitInp :: [Inline] -> [Inline] splitInp = splitStrWhen (\c -> splitOn c || isSpace c) where splitOn ':' = False splitOn c = isPunctuation c locatorWords :: LocatorMap -> [Inline] -> (Text, Text, [Inline]) locatorWords locMap inp = case parse (pLocatorWords locMap) "suffix" $ splitInp inp of Right r -> r Left _ -> ("","",inp) -- Some terminology -- ---------------- -- Word => 89 -- 12-15 -- 13(a)(i)-(iv) -- [1.2.5] -- -- Integrated => [@citekey, 89] -- [@citekey, p. 40, 41, 89-199, suffix] -- Delimited => [@citekey{89}] -- [@citekey, {p. literally anything except unbalanced curly quotes}, suffix] -- -- When parsing integrated locators you have to be careful not to include -- 'suffix' in the locator, so that means pretty strict control over when -- you're allowed to use NO digits in a word. [@citekey, p. 40(a) (bcd)] will -- stop parsing the locator after (a). You also have to be careful not to parse -- random terms like 'and' in en-US as citeLabels, which means careful control -- over what must come directly after any label string (via notFollowedBy). -- -- With delimited locators, it's a different story. Parse as long a locator -- label as you can find in the terms map, then include EVERYTHING in the outer -- {} braces. -- -- Ultimately the complexity is driven by wanting as many locator words as -- possible being parsed in the integrated style, because it fits with the -- aims of Markdown (being readable). Ideally, anything except a word with -- neither roman numerals nor arabic digits can be integrated. Some -- counter-examples: -- a -- (a)(b)(c) -- (hello) pLocatorWords :: LocatorMap -> Parsec [Inline] st (Text, Text, [Inline]) pLocatorWords locMap = do optional $ pMatchChar "," (== ',') optional pSpace (la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap s <- getInput -- rest is suffix -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on -- i.e. the first one will be " 9" return (la, trim lo, s) pLocatorDelimited :: LocatorMap -> Parsec [Inline] st (Text, Text) pLocatorDelimited locMap = try $ do _ <- pMatchChar "{" (== '{') skipMany pSpace -- gobble pre-spaces so label doesn't try to include them (la, _) <- pLocatorLabelDelimited locMap -- we only care about balancing {} and [] (because of the outer [] scope); -- the rest can be anything let inner = do { t <- anyToken; return (True, stringify t) } gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner) _ <- pMatchChar "}" (== '}') let lo = T.concat $ map snd gs return (la, lo) pLocatorLabelDelimited :: LocatorMap -> Parsec [Inline] st (Text, Bool) pLocatorLabelDelimited locMap = pLocatorLabel' locMap lim <|> return ("page", True) where lim = stringify <$> anyToken pLocatorIntegrated :: LocatorMap -> Parsec [Inline] st (Text, Text) pLocatorIntegrated locMap = try $ do (la, wasImplicit) <- pLocatorLabelIntegrated locMap -- if we got the label implicitly, we have presupposed the first one is going -- to have a digit, so guarantee that. You _can_ have p. (a) because you -- specified it. let modifier = if wasImplicit then requireDigits else requireRomansOrDigits g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier gs <- many (try $ pLocatorWordIntegrated False >>= modifier) let lo = T.concat (g:gs) return (la, lo) pLocatorLabelIntegrated :: LocatorMap -> Parsec [Inline] st (Text, Bool) pLocatorLabelIntegrated locMap = pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True)) where lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits digital = try $ pLocatorWordIntegrated True >>= requireDigits pLocatorLabel' :: LocatorMap -> Parsec [Inline] st Text -> Parsec [Inline] st (Text, Bool) pLocatorLabel' locMap lim = go "" where -- grow the match string until we hit the end -- trying to find the largest match for a label go acc = try $ do -- advance at least one token each time -- the pathological case is "p.3" t <- anyToken ts <- manyTill anyToken (try $ lookAhead lim) let s = acc <> stringify (t:ts) case M.lookup (trim s) locMap of -- try to find a longer one, or return this one Just l -> go s <|> return (l, False) Nothing -> go s -- hard requirement for a locator to have some real digits in it requireDigits :: (Bool, Text) -> Parsec [Inline] st Text requireDigits (_, s) = if not (T.any isDigit s) then Prelude.fail "requireDigits" else return s -- soft requirement for a sequence with some roman or arabic parts -- (a)(iv) -- because iv is roman -- 1(a) -- because 1 is an actual digit -- NOT: a, (a)-(b), hello, (some text in brackets) requireRomansOrDigits :: (Bool, Text) -> Parsec [Inline] st Text requireRomansOrDigits (d, s) = if not d then Prelude.fail "requireRomansOrDigits" else return s pLocatorWordIntegrated :: Bool -> Parsec [Inline] st (Bool, Text) pLocatorWordIntegrated isFirst = try $ do punct <- if isFirst then return "" else (stringify <$> pLocatorSep) <|> return "" sp <- option "" (pSpace >> return " ") (dig, s) <- pBalancedBraces [('(',')'), ('[',']'), ('{','}')] pPageSeq return (dig, punct <> sp <> s) -- we want to capture: 123, 123A, C22, XVII, 33-44, 22-33; 22-11 -- 34(1), 34A(A), 34(1)(i)(i), (1)(a) -- [17], [17]-[18], '591 [84]' -- (because CSL cannot pull out individual pages/sections -- to wrap in braces on a per-style basis) pBalancedBraces :: [(Char, Char)] -> Parsec [Inline] st (Bool, Text) -> Parsec [Inline] st (Bool, Text) pBalancedBraces braces p = try $ do ss <- many1 surround return $ anyWereDigitLike ss where except = notFollowedBy pBraces >> p -- outer and inner surround = foldl (\a (open, close) -> sur open close except <|> a) except braces isc c = stringify <$> pMatchChar [c] (== c) sur c c' m = try $ do (d, mid) <- between (isc c) (isc c') (option (False, "") m) return (d, T.cons c . flip T.snoc c' $ mid) flattened = concatMap (\(o, c) -> [o, c]) braces pBraces = pMatchChar "braces" (`elem` flattened) -- YES 1, 1.2, 1.2.3 -- NO 1., 1.2. a.6 -- can't use sepBy because we want to leave trailing .s pPageSeq :: Parsec [Inline] st (Bool, Text) pPageSeq = oneDotTwo <|> withPeriod where oneDotTwo = do u <- pPageUnit us <- many withPeriod return $ anyWereDigitLike (u:us) withPeriod = try $ do -- .2 p <- pMatchChar "." (== '.') u <- try pPageUnit return (fst u, stringify p <> snd u) anyWereDigitLike :: [(Bool, Text)] -> (Bool, Text) anyWereDigitLike as = (any fst as, T.concat $ map snd as) pPageUnit :: Parsec [Inline] st (Bool, Text) pPageUnit = roman <|> plainUnit where -- roman is a 'digit' roman = (True,) <$> pRoman plainUnit = do ts <- many1 (notFollowedBy pSpace >> notFollowedBy pLocatorPunct >> anyToken) let s = stringify ts -- otherwise look for actual digits or -s return (T.any isDigit s, s) pRoman :: Parsec [Inline] st Text pRoman = try $ do t <- anyToken case t of Str xs -> case parseRomanNumeral (T.unpack xs) of Nothing -> mzero Just _ -> return $ xs _ -> mzero isLocatorPunct :: Char -> Bool isLocatorPunct '-' = False -- page range isLocatorPunct '–' = False -- page range, en dash isLocatorPunct ':' = False -- vol:page-range hack isLocatorPunct c = isPunctuation c -- includes [{()}] pLocatorPunct :: Parsec [Inline] st Inline pLocatorPunct = pMatchChar "punctuation" isLocatorPunct pLocatorSep :: Parsec [Inline] st Inline pLocatorSep = pMatchChar "locator separator" isLocatorSep isLocatorSep :: Char -> Bool isLocatorSep ',' = True isLocatorSep ';' = True isLocatorSep _ = False pMatchChar :: String -> (Char -> Bool) -> Parsec [Inline] st Inline pMatchChar msg f = pMatch msg mc where mc (Str (T.unpack -> [c])) = f c mc _ = False pSpace :: Parsec [Inline] st Inline pSpace = pMatch "' '" (\t -> isSpacy t || t == Str "\160") pMatch :: String -> (Inline -> Bool) -> Parsec [Inline] st Inline pMatch msg condition = try $ do t <- anyToken if not (condition t) then Prelude.fail msg else return t type LocatorMap = M.Map Text Text locatorMap :: Style -> LocatorMap locatorMap sty = foldr (\term -> M.insert (termSingular term) (cslTerm term) . M.insert (termPlural term) (cslTerm term)) M.empty (concatMap localeTerms $ styleLocale sty)