{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Citeproc.Locator
  ( parseLocator )
where
import Citeproc.Types
import Data.Text (Text)
import qualified Data.Text as T
import Data.List (foldl')
import Text.Parsec
import Text.Pandoc.Definition
import Text.Pandoc.Parsing (romanNumeral)
import Text.Pandoc.Shared (stringify)
import Control.Monad (mzero)
import qualified Data.Map as M
import Data.Char (isSpace, isPunctuation, isDigit)

parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline])
parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline])
parseLocator Locale
locale [Inline]
inp =
  case Parsec [Inline] () (Maybe (Text, Text), [Inline])
-> SourceName
-> [Inline]
-> Either ParseError (Maybe (Text, Text), [Inline])
forall s t a.
Stream s Identity t =>
Parsec s () a -> SourceName -> s -> Either ParseError a
parse (LocatorMap -> Parsec [Inline] () (Maybe (Text, Text), [Inline])
pLocatorWords (Locale -> LocatorMap
toLocatorMap Locale
locale)) SourceName
"suffix" ([Inline] -> Either ParseError (Maybe (Text, Text), [Inline]))
-> [Inline] -> Either ParseError (Maybe (Text, Text), [Inline])
forall a b. (a -> b) -> a -> b
$ [Inline] -> [Inline]
splitInp [Inline]
inp of
       Right (Maybe (Text, Text), [Inline])
r -> (Maybe (Text, Text), [Inline])
r
       Left ParseError
_  -> (Maybe (Text, Text)
forall a. Maybe a
Nothing, [Inline]
inp)

splitInp :: [Inline] -> [Inline]
splitInp :: [Inline] -> [Inline]
splitInp = (Char -> Bool) -> [Inline] -> [Inline]
splitStrWhen (\Char
c -> Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| (Char -> Bool
isPunctuation Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':'))

--
-- Locator parsing
--

type LocatorParser = Parsec [Inline] ()

pLocatorWords :: LocatorMap
              -> LocatorParser (Maybe (Text, Text), [Inline])
pLocatorWords :: LocatorMap -> Parsec [Inline] () (Maybe (Text, Text), [Inline])
pLocatorWords LocatorMap
locMap = do
  ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (ParsecT [Inline] () Identity Inline
 -> ParsecT [Inline] () Identity ())
-> ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity ()
forall a b. (a -> b) -> a -> b
$ SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
"," (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
',')
  ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional ParsecT [Inline] () Identity Inline
pSpace
  (Text
la, Text
lo) <- LocatorMap -> LocatorParser (Text, Text)
pLocatorDelimited LocatorMap
locMap LocatorParser (Text, Text)
-> LocatorParser (Text, Text) -> LocatorParser (Text, Text)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> LocatorMap -> LocatorParser (Text, Text)
pLocatorIntegrated LocatorMap
locMap
  [Inline]
s <- ParsecT [Inline] () Identity [Inline]
forall (m :: * -> *) s u. Monad m => ParsecT s u m s
getInput -- rest is suffix
  -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on
  -- i.e. the first one will be " 9"
  (Maybe (Text, Text), [Inline])
-> Parsec [Inline] () (Maybe (Text, Text), [Inline])
forall (m :: * -> *) a. Monad m => a -> m a
return ((Maybe (Text, Text), [Inline])
 -> Parsec [Inline] () (Maybe (Text, Text), [Inline]))
-> (Maybe (Text, Text), [Inline])
-> Parsec [Inline] () (Maybe (Text, Text), [Inline])
forall a b. (a -> b) -> a -> b
$
    if Text -> Bool
T.null Text
la Bool -> Bool -> Bool
&& Text -> Bool
T.null Text
lo
       then (Maybe (Text, Text)
forall a. Maybe a
Nothing, [Inline]
s)
       else ((Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (Text
la, Text -> Text
T.strip Text
lo), [Inline]
s)

pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text)
pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text)
pLocatorDelimited LocatorMap
locMap = LocatorParser (Text, Text) -> LocatorParser (Text, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser (Text, Text) -> LocatorParser (Text, Text))
-> LocatorParser (Text, Text) -> LocatorParser (Text, Text)
forall a b. (a -> b) -> a -> b
$ do
  Inline
_ <- SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
"{" (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'{')
  ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Inline] () Identity Inline
pSpace -- gobble pre-spaces so label doesn't try to include them
  (Text
la, Bool
_) <- LocatorMap -> LocatorParser (Text, Bool)
pLocatorLabelDelimited LocatorMap
locMap
  -- we only care about balancing {} and [] (because of the outer [] scope);
  -- the rest can be anything
  let inner :: ParsecT [Inline] u Identity (Bool, Text)
inner = do { Inline
t <- ParsecT [Inline] u Identity Inline
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m t
anyToken; (Bool, Text) -> ParsecT [Inline] u Identity (Bool, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
True, Inline -> Text
forall a. Walkable Inline a => a -> Text
stringify Inline
t) }
  [(Bool, Text)]
gs <- ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity [(Bool, Text)]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ([(Char, Char)]
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
pBalancedBraces [(Char
'{',Char
'}'), (Char
'[',Char
']')] ParsecT [Inline] () Identity (Bool, Text)
forall u. ParsecT [Inline] u Identity (Bool, Text)
inner)
  Inline
_ <- SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
"}" (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'}')
  let lo :: Text
lo = [Text] -> Text
T.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ ((Bool, Text) -> Text) -> [(Bool, Text)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Bool, Text) -> Text
forall a b. (a, b) -> b
snd [(Bool, Text)]
gs
  (Text, Text) -> LocatorParser (Text, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
la, Text
lo)

pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool)
pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool)
pLocatorLabelDelimited LocatorMap
locMap
  = LocatorMap -> LocatorParser Text -> LocatorParser (Text, Bool)
pLocatorLabel' LocatorMap
locMap LocatorParser Text
forall u. ParsecT [Inline] u Identity Text
lim LocatorParser (Text, Bool)
-> LocatorParser (Text, Bool) -> LocatorParser (Text, Bool)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Text, Bool) -> LocatorParser (Text, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
"page", Bool
True)
    where
        lim :: ParsecT [Inline] u Identity Text
lim = Inline -> Text
forall a. Walkable Inline a => a -> Text
stringify (Inline -> Text)
-> ParsecT [Inline] u Identity Inline
-> ParsecT [Inline] u Identity Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT [Inline] u Identity Inline
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m t
anyToken

pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text)
pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text)
pLocatorIntegrated LocatorMap
locMap = LocatorParser (Text, Text) -> LocatorParser (Text, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser (Text, Text) -> LocatorParser (Text, Text))
-> LocatorParser (Text, Text) -> LocatorParser (Text, Text)
forall a b. (a -> b) -> a -> b
$ do
  (Text
la, Bool
wasImplicit) <- LocatorMap -> LocatorParser (Text, Bool)
pLocatorLabelIntegrated LocatorMap
locMap
  -- if we got the label implicitly, we have presupposed the first one is
  -- going to have a digit, so guarantee that. You _can_ have p. (a)
  -- because you specified it.
  let modifier :: (Bool, Text) -> LocatorParser Text
modifier = if Bool
wasImplicit
                    then (Bool, Text) -> LocatorParser Text
requireDigits
                    else (Bool, Text) -> LocatorParser Text
requireRomansOrDigits
  Text
g <- LocatorParser Text -> LocatorParser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser Text -> LocatorParser Text)
-> LocatorParser Text -> LocatorParser Text
forall a b. (a -> b) -> a -> b
$ Bool -> ParsecT [Inline] () Identity (Bool, Text)
pLocatorWordIntegrated (Bool -> Bool
not Bool
wasImplicit) ParsecT [Inline] () Identity (Bool, Text)
-> ((Bool, Text) -> LocatorParser Text) -> LocatorParser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Bool, Text) -> LocatorParser Text
modifier
  [Text]
gs <- LocatorParser Text -> ParsecT [Inline] () Identity [Text]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (LocatorParser Text -> LocatorParser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser Text -> LocatorParser Text)
-> LocatorParser Text -> LocatorParser Text
forall a b. (a -> b) -> a -> b
$ Bool -> ParsecT [Inline] () Identity (Bool, Text)
pLocatorWordIntegrated Bool
False ParsecT [Inline] () Identity (Bool, Text)
-> ((Bool, Text) -> LocatorParser Text) -> LocatorParser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Bool, Text) -> LocatorParser Text
modifier)
  let lo :: Text
lo = [Text] -> Text
T.concat (Text
gText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
gs)
  (Text, Text) -> LocatorParser (Text, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
la, Text
lo)

pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool)
pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool)
pLocatorLabelIntegrated LocatorMap
locMap
  = LocatorMap -> LocatorParser Text -> LocatorParser (Text, Bool)
pLocatorLabel' LocatorMap
locMap LocatorParser Text
lim LocatorParser (Text, Bool)
-> LocatorParser (Text, Bool) -> LocatorParser (Text, Bool)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (LocatorParser Text -> LocatorParser Text
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead LocatorParser Text
digital LocatorParser Text
-> LocatorParser (Text, Bool) -> LocatorParser (Text, Bool)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Text, Bool) -> LocatorParser (Text, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
"page", Bool
True))
    where
      lim :: LocatorParser Text
lim = LocatorParser Text -> LocatorParser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser Text -> LocatorParser Text)
-> LocatorParser Text -> LocatorParser Text
forall a b. (a -> b) -> a -> b
$ Bool -> ParsecT [Inline] () Identity (Bool, Text)
pLocatorWordIntegrated Bool
True ParsecT [Inline] () Identity (Bool, Text)
-> ((Bool, Text) -> LocatorParser Text) -> LocatorParser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Bool, Text) -> LocatorParser Text
requireRomansOrDigits
      digital :: LocatorParser Text
digital = LocatorParser Text -> LocatorParser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser Text -> LocatorParser Text)
-> LocatorParser Text -> LocatorParser Text
forall a b. (a -> b) -> a -> b
$ Bool -> ParsecT [Inline] () Identity (Bool, Text)
pLocatorWordIntegrated Bool
True ParsecT [Inline] () Identity (Bool, Text)
-> ((Bool, Text) -> LocatorParser Text) -> LocatorParser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Bool, Text) -> LocatorParser Text
requireDigits

pLocatorLabel' :: LocatorMap -> LocatorParser Text
               -> LocatorParser (Text, Bool)
pLocatorLabel' :: LocatorMap -> LocatorParser Text -> LocatorParser (Text, Bool)
pLocatorLabel' LocatorMap
locMap LocatorParser Text
lim = Text -> LocatorParser (Text, Bool)
go Text
""
    where
      -- grow the match string until we hit the end
      -- trying to find the largest match for a label
      go :: Text -> LocatorParser (Text, Bool)
go Text
acc = LocatorParser (Text, Bool) -> LocatorParser (Text, Bool)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser (Text, Bool) -> LocatorParser (Text, Bool))
-> LocatorParser (Text, Bool) -> LocatorParser (Text, Bool)
forall a b. (a -> b) -> a -> b
$ do
          -- advance at least one token each time
          -- the pathological case is "p.3"
          Inline
t <- ParsecT [Inline] () Identity Inline
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m t
anyToken
          [Inline]
ts <- ParsecT [Inline] () Identity Inline
-> LocatorParser Text -> ParsecT [Inline] () Identity [Inline]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill ParsecT [Inline] () Identity Inline
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m t
anyToken (LocatorParser Text -> LocatorParser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser Text -> LocatorParser Text)
-> LocatorParser Text -> LocatorParser Text
forall a b. (a -> b) -> a -> b
$ LocatorParser Text -> LocatorParser Text
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead LocatorParser Text
lim)
          let s :: Text
s = Text
acc Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Inline] -> Text
forall a. Walkable Inline a => a -> Text
stringify (Inline
tInline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
:[Inline]
ts)
          case Text -> LocatorMap -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup (Text -> Text
T.strip Text
s) LocatorMap
locMap of
            -- try to find a longer one, or return this one
            Just Text
l -> Text -> LocatorParser (Text, Bool)
go Text
s LocatorParser (Text, Bool)
-> LocatorParser (Text, Bool) -> LocatorParser (Text, Bool)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Text, Bool) -> LocatorParser (Text, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
l, Bool
False)
            Maybe Text
Nothing -> Text -> LocatorParser (Text, Bool)
go Text
s

-- hard requirement for a locator to have some real digits in it
requireDigits :: (Bool, Text) -> LocatorParser Text
requireDigits :: (Bool, Text) -> LocatorParser Text
requireDigits (Bool
_, Text
s) = if Bool -> Bool
not ((Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isDigit Text
s)
                          then SourceName -> LocatorParser Text
forall (m :: * -> *) a. MonadFail m => SourceName -> m a
Prelude.fail SourceName
"requireDigits"
                          else Text -> LocatorParser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
s

-- soft requirement for a sequence with some roman or arabic parts
-- (a)(iv) -- because iv is roman
-- 1(a)  -- because 1 is an actual digit
-- NOT: a, (a)-(b), hello, (some text in brackets)
requireRomansOrDigits :: (Bool, Text) -> LocatorParser Text
requireRomansOrDigits :: (Bool, Text) -> LocatorParser Text
requireRomansOrDigits (Bool
d, Text
s) = if Bool -> Bool
not Bool
d
                                  then SourceName -> LocatorParser Text
forall (m :: * -> *) a. MonadFail m => SourceName -> m a
Prelude.fail SourceName
"requireRomansOrDigits"
                                  else Text -> LocatorParser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
s

pLocatorWordIntegrated :: Bool -> LocatorParser (Bool, Text)
pLocatorWordIntegrated :: Bool -> ParsecT [Inline] () Identity (Bool, Text)
pLocatorWordIntegrated Bool
isFirst = ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT [Inline] () Identity (Bool, Text)
 -> ParsecT [Inline] () Identity (Bool, Text))
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall a b. (a -> b) -> a -> b
$ do
  Text
punct <- if Bool
isFirst
              then Text -> LocatorParser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
              else (Inline -> Text
forall a. Walkable Inline a => a -> Text
stringify (Inline -> Text)
-> ParsecT [Inline] () Identity Inline -> LocatorParser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT [Inline] () Identity Inline
pLocatorSep) LocatorParser Text -> LocatorParser Text -> LocatorParser Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Text -> LocatorParser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
  Text
sp <- Text -> LocatorParser Text -> LocatorParser Text
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option Text
"" (ParsecT [Inline] () Identity Inline
pSpace ParsecT [Inline] () Identity Inline
-> LocatorParser Text -> LocatorParser Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> LocatorParser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
" ")
  (Bool
dig, Text
s) <- [(Char, Char)]
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
pBalancedBraces [(Char
'(',Char
')'), (Char
'[',Char
']'), (Char
'{',Char
'}')] ParsecT [Inline] () Identity (Bool, Text)
pPageSeq
  (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
dig, Text
punct Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
sp Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
s)

-- we want to capture:  123, 123A, C22, XVII, 33-44, 22-33; 22-11
--                      34(1), 34A(A), 34(1)(i)(i), (1)(a)
--                      [17], [17]-[18], '591 [84]'
--                      (because CSL cannot pull out individual pages/sections
--                      to wrap in braces on a per-style basis)
pBalancedBraces :: [(Char, Char)]
                -> LocatorParser (Bool, Text)
                -> LocatorParser (Bool, Text)
pBalancedBraces :: [(Char, Char)]
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
pBalancedBraces [(Char, Char)]
braces ParsecT [Inline] () Identity (Bool, Text)
p = ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT [Inline] () Identity (Bool, Text)
 -> ParsecT [Inline] () Identity (Bool, Text))
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall a b. (a -> b) -> a -> b
$ do
  [(Bool, Text)]
ss <- ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity [(Bool, Text)]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT [Inline] () Identity (Bool, Text)
surround
  (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text))
-> (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall a b. (a -> b) -> a -> b
$ [(Bool, Text)] -> (Bool, Text)
anyWereDigitLike [(Bool, Text)]
ss
  where
      except :: ParsecT [Inline] () Identity (Bool, Text)
except = ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity ()
forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy ParsecT [Inline] () Identity Inline
pBraces ParsecT [Inline] () Identity ()
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT [Inline] () Identity (Bool, Text)
p
      -- outer and inner
      surround :: ParsecT [Inline] () Identity (Bool, Text)
surround = (ParsecT [Inline] () Identity (Bool, Text)
 -> (Char, Char) -> ParsecT [Inline] () Identity (Bool, Text))
-> ParsecT [Inline] () Identity (Bool, Text)
-> [(Char, Char)]
-> ParsecT [Inline] () Identity (Bool, Text)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\ParsecT [Inline] () Identity (Bool, Text)
a (Char
open, Char
close) -> Char
-> Char
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
sur Char
open Char
close ParsecT [Inline] () Identity (Bool, Text)
except ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT [Inline] () Identity (Bool, Text)
a)
                       ParsecT [Inline] () Identity (Bool, Text)
except
                       [(Char, Char)]
braces

      isc :: Char -> LocatorParser Text
isc Char
c = Inline -> Text
forall a. Walkable Inline a => a -> Text
stringify (Inline -> Text)
-> ParsecT [Inline] () Identity Inline -> LocatorParser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar [Char
c] (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)

      sur :: Char
-> Char
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
sur Char
c Char
c' ParsecT [Inline] () Identity (Bool, Text)
m = ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT [Inline] () Identity (Bool, Text)
 -> ParsecT [Inline] () Identity (Bool, Text))
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall a b. (a -> b) -> a -> b
$ do
          (Bool
d, Text
mid) <- LocatorParser Text
-> LocatorParser Text
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> LocatorParser Text
isc Char
c) (Char -> LocatorParser Text
isc Char
c') ((Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option (Bool
False, Text
"") ParsecT [Inline] () Identity (Bool, Text)
m)
          (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
d, Char -> Text -> Text
T.cons Char
c (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Char -> Text) -> Char -> Text -> Text
forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> Char -> Text
T.snoc Char
c' (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$  Text
mid)

      flattened :: SourceName
flattened = ((Char, Char) -> SourceName) -> [(Char, Char)] -> SourceName
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\(Char
o, Char
c) -> [Char
o, Char
c]) [(Char, Char)]
braces
      pBraces :: ParsecT [Inline] () Identity Inline
pBraces = SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
"braces" (Char -> SourceName -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` SourceName
flattened)


-- YES 1, 1.2, 1.2.3
-- NO  1., 1.2. a.6
-- can't use sepBy because we want to leave trailing .s
pPageSeq :: LocatorParser (Bool, Text)
pPageSeq :: ParsecT [Inline] () Identity (Bool, Text)
pPageSeq = ParsecT [Inline] () Identity (Bool, Text)
oneDotTwo ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT [Inline] () Identity (Bool, Text)
withPeriod
  where
      oneDotTwo :: ParsecT [Inline] () Identity (Bool, Text)
oneDotTwo = do
          (Bool, Text)
u <- ParsecT [Inline] () Identity (Bool, Text)
pPageUnit
          [(Bool, Text)]
us <- ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity [(Bool, Text)]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT [Inline] () Identity (Bool, Text)
withPeriod
          (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text))
-> (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall a b. (a -> b) -> a -> b
$ [(Bool, Text)] -> (Bool, Text)
anyWereDigitLike ((Bool, Text)
u(Bool, Text) -> [(Bool, Text)] -> [(Bool, Text)]
forall a. a -> [a] -> [a]
:[(Bool, Text)]
us)
      withPeriod :: ParsecT [Inline] () Identity (Bool, Text)
withPeriod = ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT [Inline] () Identity (Bool, Text)
 -> ParsecT [Inline] () Identity (Bool, Text))
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall a b. (a -> b) -> a -> b
$ do
          -- .2
          Inline
p <- SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
"." (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.')
          (Bool, Text)
u <- ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT [Inline] () Identity (Bool, Text)
pPageUnit
          (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Bool, Text) -> Bool
forall a b. (a, b) -> a
fst (Bool, Text)
u, Inline -> Text
forall a. Walkable Inline a => a -> Text
stringify Inline
p Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> (Bool, Text) -> Text
forall a b. (a, b) -> b
snd (Bool, Text)
u)

anyWereDigitLike :: [(Bool, Text)] -> (Bool, Text)
anyWereDigitLike :: [(Bool, Text)] -> (Bool, Text)
anyWereDigitLike [(Bool, Text)]
as = (((Bool, Text) -> Bool) -> [(Bool, Text)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Bool, Text) -> Bool
forall a b. (a, b) -> a
fst [(Bool, Text)]
as, [Text] -> Text
T.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ ((Bool, Text) -> Text) -> [(Bool, Text)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Bool, Text) -> Text
forall a b. (a, b) -> b
snd [(Bool, Text)]
as)

pPageUnit :: LocatorParser (Bool, Text)
pPageUnit :: ParsecT [Inline] () Identity (Bool, Text)
pPageUnit = ParsecT [Inline] () Identity (Bool, Text)
roman ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
-> ParsecT [Inline] () Identity (Bool, Text)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT [Inline] () Identity (Bool, Text)
plainUnit
  where
      -- roman is a 'digit'
      roman :: ParsecT [Inline] () Identity (Bool, Text)
roman = (Bool
True,) (Text -> (Bool, Text))
-> LocatorParser Text -> ParsecT [Inline] () Identity (Bool, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> LocatorParser Text
pRoman
      plainUnit :: ParsecT [Inline] () Identity (Bool, Text)
plainUnit = do
          [Inline]
ts <- ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity [Inline]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity ()
forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy ParsecT [Inline] () Identity Inline
pSpace ParsecT [Inline] () Identity ()
-> ParsecT [Inline] () Identity ()
-> ParsecT [Inline] () Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                       ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity ()
forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy ParsecT [Inline] () Identity Inline
pLocatorPunct ParsecT [Inline] () Identity ()
-> ParsecT [Inline] () Identity Inline
-> ParsecT [Inline] () Identity Inline
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                       ParsecT [Inline] () Identity Inline
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m t
anyToken)
          let s :: Text
s = [Inline] -> Text
forall a. Walkable Inline a => a -> Text
stringify [Inline]
ts
          -- otherwise look for actual digits or -s
          (Bool, Text) -> ParsecT [Inline] () Identity (Bool, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isDigit Text
s, Text
s)

pRoman :: LocatorParser Text
pRoman :: LocatorParser Text
pRoman = LocatorParser Text -> LocatorParser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (LocatorParser Text -> LocatorParser Text)
-> LocatorParser Text -> LocatorParser Text
forall a b. (a -> b) -> a -> b
$ do
  Inline
tok <- ParsecT [Inline] () Identity Inline
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m t
anyToken
  case Inline
tok of
       Str Text
t -> case Parsec Text () () -> SourceName -> Text -> Either ParseError ()
forall s t a.
Stream s Identity t =>
Parsec s () a -> SourceName -> s -> Either ParseError a
parse (Bool -> ParserT Text () Identity Int
forall s (m :: * -> *) st.
Stream s m Char =>
Bool -> ParserT s st m Int
romanNumeral Bool
True ParserT Text () Identity Int
-> Parsec Text () () -> Parsec Text () ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parsec Text () ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof)
                   SourceName
"roman numeral" (Text -> Text
T.toUpper Text
t) of
                      Left ParseError
_    -> LocatorParser Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
                      Right ()  -> Text -> LocatorParser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
       Inline
_      -> LocatorParser Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero

pLocatorPunct :: LocatorParser Inline
pLocatorPunct :: ParsecT [Inline] () Identity Inline
pLocatorPunct = SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
"punctuation" Char -> Bool
isLocatorPunct

pLocatorSep :: LocatorParser Inline
pLocatorSep :: ParsecT [Inline] () Identity Inline
pLocatorSep = SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
"locator separator" Char -> Bool
isLocatorSep

pMatchChar :: String -> (Char -> Bool) -> LocatorParser Inline
pMatchChar :: SourceName -> (Char -> Bool) -> ParsecT [Inline] () Identity Inline
pMatchChar SourceName
msg Char -> Bool
f = (Inline -> Bool) -> ParsecT [Inline] () Identity Inline
satisfyTok Inline -> Bool
f' ParsecT [Inline] () Identity Inline
-> SourceName -> ParsecT [Inline] () Identity Inline
forall s u (m :: * -> *) a.
ParsecT s u m a -> SourceName -> ParsecT s u m a
<?> SourceName
msg
    where
        f' :: Inline -> Bool
f' (Str (Text -> SourceName
T.unpack -> [Char
c])) = Char -> Bool
f Char
c
        f' Inline
_                       = Bool
False

pSpace :: LocatorParser Inline
pSpace :: ParsecT [Inline] () Identity Inline
pSpace = (Inline -> Bool) -> ParsecT [Inline] () Identity Inline
satisfyTok (\Inline
t -> Inline -> Bool
isSpacey Inline
t Bool -> Bool -> Bool
|| Inline
t Inline -> Inline -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Inline
Str Text
"\160") ParsecT [Inline] () Identity Inline
-> SourceName -> ParsecT [Inline] () Identity Inline
forall s u (m :: * -> *) a.
ParsecT s u m a -> SourceName -> ParsecT s u m a
<?> SourceName
"space"

satisfyTok :: (Inline -> Bool) -> LocatorParser Inline
satisfyTok :: (Inline -> Bool) -> ParsecT [Inline] () Identity Inline
satisfyTok Inline -> Bool
f = (Inline -> SourceName)
-> (SourcePos -> Inline -> [Inline] -> SourcePos)
-> (Inline -> Maybe Inline)
-> ParsecT [Inline] () Identity Inline
forall s (m :: * -> *) t a u.
Stream s m t =>
(t -> SourceName)
-> (SourcePos -> t -> s -> SourcePos)
-> (t -> Maybe a)
-> ParsecT s u m a
tokenPrim Inline -> SourceName
forall a. Show a => a -> SourceName
show (\SourcePos
sp Inline
_ [Inline]
_ -> SourcePos
sp) (\Inline
tok -> if Inline -> Bool
f Inline
tok
                                                          then Inline -> Maybe Inline
forall a. a -> Maybe a
Just Inline
tok
                                                          else Maybe Inline
forall a. Maybe a
Nothing)

isSpacey :: Inline -> Bool
isSpacey :: Inline -> Bool
isSpacey Inline
Space     = Bool
True
isSpacey Inline
SoftBreak = Bool
True
isSpacey Inline
_         = Bool
False

isLocatorPunct :: Char -> Bool
isLocatorPunct :: Char -> Bool
isLocatorPunct Char
'-' = Bool
False -- page range
isLocatorPunct Char
'–' = Bool
False -- page range, en dash
isLocatorPunct Char
':' = Bool
False -- vol:page-range hack
isLocatorPunct Char
c   = Char -> Bool
isPunctuation Char
c -- includes [{()}]

isLocatorSep :: Char -> Bool
isLocatorSep :: Char -> Bool
isLocatorSep Char
',' = Bool
True
isLocatorSep Char
';' = Bool
True
isLocatorSep Char
_   = Bool
False

splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
splitStrWhen Char -> Bool
_ [] = []
splitStrWhen Char -> Bool
p (Str Text
xs : [Inline]
ys) = SourceName -> [Inline]
go (Text -> SourceName
T.unpack Text
xs) [Inline] -> [Inline] -> [Inline]
forall a. [a] -> [a] -> [a]
++ (Char -> Bool) -> [Inline] -> [Inline]
splitStrWhen Char -> Bool
p [Inline]
ys
  where
   go :: SourceName -> [Inline]
go [] = []
   go SourceName
s = case (Char -> Bool) -> SourceName -> (SourceName, SourceName)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break Char -> Bool
p SourceName
s of
             ([],[])     -> []
             (SourceName
zs,[])     -> [Text -> Inline
Str (Text -> Inline) -> Text -> Inline
forall a b. (a -> b) -> a -> b
$ SourceName -> Text
T.pack SourceName
zs]
             ([],Char
w:SourceName
ws) -> Text -> Inline
Str (Char -> Text
T.singleton Char
w) Inline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
: SourceName -> [Inline]
go SourceName
ws
             (SourceName
zs,Char
w:SourceName
ws) -> Text -> Inline
Str (SourceName -> Text
T.pack SourceName
zs) Inline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
: Text -> Inline
Str (Char -> Text
T.singleton Char
w) Inline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
: SourceName -> [Inline]
go SourceName
ws
splitStrWhen Char -> Bool
p (Inline
x : [Inline]
ys) = Inline
x Inline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
: (Char -> Bool) -> [Inline] -> [Inline]
splitStrWhen Char -> Bool
p [Inline]
ys

--
-- Locator Map
--

type LocatorMap = M.Map Text Text

toLocatorMap :: Locale -> LocatorMap
toLocatorMap :: Locale -> LocatorMap
toLocatorMap Locale
locale =
  (Text -> LocatorMap -> LocatorMap)
-> LocatorMap -> [Text] -> LocatorMap
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> LocatorMap -> LocatorMap
go LocatorMap
forall a. Monoid a => a
mempty [Text]
locatorTerms
 where
  go :: Text -> LocatorMap -> LocatorMap
go Text
tname LocatorMap
locmap =
    case Text -> Map Text [(Term, Text)] -> Maybe [(Term, Text)]
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
tname (Locale -> Map Text [(Term, Text)]
localeTerms Locale
locale) of
      Maybe [(Term, Text)]
Nothing -> LocatorMap
locmap
      Just [(Term, Text)]
ts -> ((Term, Text) -> LocatorMap -> LocatorMap)
-> LocatorMap -> [(Term, Text)] -> LocatorMap
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Term, Text)
x -> Text -> Text -> LocatorMap -> LocatorMap
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert ((Term, Text) -> Text
forall a b. (a, b) -> b
snd (Term, Text)
x) Text
tname) LocatorMap
locmap [(Term, Text)]
ts

locatorTerms :: [Text]
locatorTerms :: [Text]
locatorTerms =
  [ Text
"book"
  , Text
"chapter"
  , Text
"column"
  , Text
"figure"
  , Text
"folio"
  , Text
"issue"
  , Text
"line"
  , Text
"note"
  , Text
"opus"
  , Text
"page"
  , Text
"number-of-pages"
  , Text
"paragraph"
  , Text
"part"
  , Text
"section"
  , Text
"sub verbo"
  , Text
"verse"
  , Text
"volume" ]