module Text.Pandoc.Shared (
splitBy,
splitByIndices,
substitute,
joinWithSep,
tabsToSpaces,
backslashEscapes,
escapeStringUsing,
stripTrailingNewlines,
removeLeadingTrailingSpace,
removeLeadingSpace,
removeTrailingSpace,
stripFirstAndLast,
camelCaseToHyphenated,
toRomanNumeral,
(>>~),
anyLine,
many1Till,
notFollowedBy',
oneOfStrings,
spaceChar,
skipSpaces,
blankline,
blanklines,
enclosed,
stringAnyCase,
parseFromString,
lineClump,
charsInBalanced,
charsInBalanced',
romanNumeral,
withHorizDisplacement,
nullBlock,
failIfStrict,
escaped,
anyOrderedListMarker,
orderedListMarker,
charRef,
readWith,
testStringWith,
ParserState (..),
defaultParserState,
Reference (..),
isNoteBlock,
isKeyBlock,
isLineClump,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
NoteTable,
KeyTable,
lookupKeySrc,
refsMatch,
prettyPandoc,
orderedListMarkers,
normalizeSpaces,
compactify,
Element (..),
hierarchicalize,
isHeaderBlock,
WriterOptions (..),
defaultWriterOptions
) where
import Text.Pandoc.Definition
import Text.ParserCombinators.Parsec
import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, chr, isLower, isUpper )
import Data.List ( find, groupBy, isPrefixOf, isSuffixOf )
splitBy :: (Eq a) => a -> [a] -> [[a]]
splitBy _ [] = []
splitBy sep lst =
let (first, rest) = break (== sep) lst
rest' = dropWhile (== sep) rest
in first:(splitBy sep rest')
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst =
let (first, rest) = splitAt x lst in
first:(splitByIndices (map (\y -> y x) xs) rest)
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ lst = lst
substitute target replacement lst =
if target `isPrefixOf` lst
then replacement ++ (substitute target replacement $ drop (length target) lst)
else (head lst):(substitute target replacement $ tail lst)
joinWithSep :: [a]
-> [[a]]
-> [a]
joinWithSep sep [] = []
joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
tabsToSpaces :: Int
-> String
-> String
tabsToSpaces tabstop str =
unlines $ map (tabsInLine tabstop tabstop) (lines str)
tabsInLine :: Int
-> Int
-> String
-> String
tabsInLine num tabstop [] = ""
tabsInLine num tabstop (c:cs) =
let (replacement, nextnum) = if c == '\t'
then (replicate num ' ', tabstop)
else if num > 1
then ([c], num 1)
else ([c], tabstop)
in replacement ++ tabsInLine nextnum tabstop cs
backslashEscapes :: [Char]
-> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing escapeTable [] = ""
escapeStringUsing escapeTable (x:xs) =
case (lookup x escapeTable) of
Just str -> str ++ rest
Nothing -> x:rest
where rest = escapeStringUsing escapeTable xs
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
removeLeadingTrailingSpace :: String -> String
removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
removeLeadingSpace :: String -> String
removeLeadingSpace = dropWhile (`elem` " \n\t")
removeTrailingSpace :: String -> String
removeTrailingSpace = reverse . removeLeadingSpace . reverse
stripFirstAndLast :: String -> String
stripFirstAndLast str =
drop 1 $ take ((length str) 1) str
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
toRomanNumeral :: Int -> String
toRomanNumeral x =
if x >= 4000 || x < 0
then "?"
else case x of
x | x >= 1000 -> "M" ++ toRomanNumeral (x 1000)
x | x >= 900 -> "CM" ++ toRomanNumeral (x 900)
x | x >= 500 -> "D" ++ toRomanNumeral (x 500)
x | x >= 400 -> "CD" ++ toRomanNumeral (x 400)
x | x >= 100 -> "C" ++ toRomanNumeral (x 100)
x | x >= 90 -> "XC" ++ toRomanNumeral (x 90)
x | x >= 50 -> "L" ++ toRomanNumeral (x 50)
x | x >= 40 -> "XL" ++ toRomanNumeral (x 40)
x | x >= 10 -> "X" ++ toRomanNumeral (x 10)
x | x >= 9 -> "IX" ++ toRomanNumeral (x 5)
x | x >= 5 -> "V" ++ toRomanNumeral (x 5)
x | x >= 4 -> "IV" ++ toRomanNumeral (x 4)
x | x >= 1 -> "I" ++ toRomanNumeral (x 1)
0 -> ""
(>>~) :: (Monad m) => m a -> m b -> m a
a >>~ b = a >>= \x -> b >> return x
anyLine :: GenParser Char st [Char]
anyLine = try (manyTill anyChar newline) <|> many1 anyChar
many1Till :: GenParser tok st a
-> GenParser tok st end
-> GenParser tok st [a]
many1Till p end = do
first <- p
rest <- manyTill p end
return (first:rest)
notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
notFollowedBy' parser = try $ (do result <- try parser
unexpected (show result))
<|> return ()
oneOfStrings :: [String] -> GenParser Char st String
oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
spaceChar :: CharParser st Char
spaceChar = oneOf " \t"
skipSpaces :: GenParser Char st ()
skipSpaces = skipMany spaceChar
blankline :: GenParser Char st Char
blankline = try $ skipSpaces >> newline
blanklines :: GenParser Char st [Char]
blanklines = many1 blankline
enclosed :: GenParser Char st t
-> GenParser Char st end
-> GenParser Char st a
-> GenParser Char st [a]
enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser (try end)
stringAnyCase :: [Char] -> CharParser st String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = try $ do
firstChar <- choice [ char (toUpper x), char (toLower x) ]
rest <- stringAnyCase xs
return (firstChar:rest)
parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
parseFromString parser str = try $ do
oldInput <- getInput
setInput str
result <- parser
setInput oldInput
return result
lineClump :: GenParser Char st String
lineClump = do
lines <- many1 (notFollowedBy blankline >> anyLine)
blanks <- blanklines <|> (eof >> return "\n")
return $ (unlines lines) ++ blanks
charsInBalanced :: Char -> Char -> GenParser Char st String
charsInBalanced open close = try $ do
char open
raw <- manyTill ( (do res <- charsInBalanced open close
return $ [open] ++ res ++ [close])
<|> (do notFollowedBy' (blankline >> blanklines)
count 1 anyChar))
(char close)
return $ concat raw
charsInBalanced' :: Char -> Char -> GenParser Char st String
charsInBalanced' open close = try $ do
char open
raw <- manyTill ( (do res <- charsInBalanced open close
return $ [open] ++ res ++ [close])
<|> count 1 anyChar)
(char close)
return $ concat raw
romanNumeral :: Bool
-> GenParser Char st Int
romanNumeral upper = try $ do
let charAnyCase c = char (if upper then toUpper c else c)
let one = charAnyCase 'i'
let five = charAnyCase 'v'
let ten = charAnyCase 'x'
let fifty = charAnyCase 'l'
let hundred = charAnyCase 'c'
let fivehundred = charAnyCase 'd'
let thousand = charAnyCase 'm'
thousands <- many thousand >>= (return . (1000 *) . length)
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
fivehundreds <- many fivehundred >>= (return . (500 *) . length)
fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
hundreds <- many hundred >>= (return . (100 *) . length)
nineties <- option 0 $ try $ ten >> hundred >> return 90
fifties <- many fifty >>= (return . (50 *) . length)
forties <- option 0 $ try $ ten >> fifty >> return 40
tens <- many ten >>= (return . (10 *) . length)
nines <- option 0 $ try $ one >> ten >> return 9
fives <- many five >>= (return . (5 *) . length)
fours <- option 0 $ try $ one >> five >> return 4
ones <- many one >>= (return . length)
let total = thousands + ninehundreds + fivehundreds + fourhundreds +
hundreds + nineties + fifties + forties + tens + nines +
fives + fours + ones
if total == 0
then fail "not a roman numeral"
else return total
withHorizDisplacement :: GenParser Char st a
-> GenParser Char st (a, Int)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
pos2 <- getPosition
return (result, sourceColumn pos2 sourceColumn pos1)
nullBlock :: GenParser Char st Block
nullBlock = anyChar >> return Null
failIfStrict :: GenParser Char ParserState ()
failIfStrict = do
state <- getState
if stateStrict state then fail "strict mode" else return ()
escaped :: GenParser Char st Char
-> GenParser Char st Inline
escaped parser = try $ do
char '\\'
result <- parser
return (Str [result])
upperRoman :: GenParser Char st (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
lowerRoman :: GenParser Char st (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
decimal :: GenParser Char st (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, read num)
defaultNum :: GenParser Char st (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
lowerAlpha = do
ch <- oneOf ['a'..'z']
return (LowerAlpha, ord ch ord 'a' + 1)
upperAlpha :: GenParser Char st (ListNumberStyle, Int)
upperAlpha = do
ch <- oneOf ['A'..'Z']
return (UpperAlpha, ord ch ord 'A' + 1)
romanOne :: GenParser Char st (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
anyOrderedListMarker :: GenParser Char st ListAttributes
anyOrderedListMarker = choice $
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
inPeriod :: GenParser Char st (ListNumberStyle, Int)
-> GenParser Char st ListAttributes
inPeriod num = try $ do
(style, start) <- num
char '.'
let delim = if style == DefaultStyle
then DefaultDelim
else Period
return (start, style, delim)
inOneParen :: GenParser Char st (ListNumberStyle, Int)
-> GenParser Char st ListAttributes
inOneParen num = try $ do
(style, start) <- num
char ')'
return (start, style, OneParen)
inTwoParens :: GenParser Char st (ListNumberStyle, Int)
-> GenParser Char st ListAttributes
inTwoParens num = try $ do
char '('
(style, start) <- num
char ')'
return (start, style, TwoParens)
orderedListMarker :: ListNumberStyle
-> ListNumberDelim
-> GenParser Char st Int
orderedListMarker style delim = do
let num = case style of
DefaultStyle -> decimal <|> defaultNum
Decimal -> decimal
UpperRoman -> upperRoman
LowerRoman -> lowerRoman
UpperAlpha -> upperAlpha
LowerAlpha -> lowerAlpha
let context = case delim of
DefaultDelim -> inPeriod
Period -> inPeriod
OneParen -> inOneParen
TwoParens -> inTwoParens
(start, style, delim) <- context num
return start
charRef :: GenParser Char st Inline
charRef = do
c <- characterReference
return $ Str [c]
readWith :: GenParser Char ParserState a
-> ParserState
-> String
-> a
readWith parser state input =
case runParser parser state "source" input of
Left err -> error $ "\nError:\n" ++ show err
Right result -> result
testStringWith :: (Show a) => GenParser Char ParserState a
-> String
-> IO ()
testStringWith parser str = putStrLn $ show $
readWith parser defaultParserState str
data ParserState = ParserState
{ stateParseRaw :: Bool,
stateParserContext :: ParserContext,
stateQuoteContext :: QuoteContext,
stateKeys :: KeyTable,
stateNotes :: NoteTable,
stateTabStop :: Int,
stateStandalone :: Bool,
stateTitle :: [Inline],
stateAuthors :: [String],
stateDate :: String,
stateStrict :: Bool,
stateSmart :: Bool,
stateColumns :: Int,
stateHeaderTable :: [HeaderType]
}
deriving Show
defaultParserState :: ParserState
defaultParserState =
ParserState { stateParseRaw = False,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
stateKeys = [],
stateNotes = [],
stateTabStop = 4,
stateStandalone = False,
stateTitle = [],
stateAuthors = [],
stateDate = [],
stateStrict = False,
stateSmart = False,
stateColumns = 80,
stateHeaderTable = [] }
data Reference
= KeyBlock [Inline] Target
| NoteBlock String [Block]
| LineClump String
deriving (Eq, Read, Show)
isNoteBlock :: Reference -> Bool
isNoteBlock (NoteBlock _ _) = True
isNoteBlock _ = False
isKeyBlock :: Reference -> Bool
isKeyBlock (KeyBlock _ _) = True
isKeyBlock _ = False
isLineClump :: Reference -> Bool
isLineClump (LineClump _) = True
isLineClump _ = False
data HeaderType
= SingleHeader Char
| DoubleHeader Char
deriving (Eq, Show)
data ParserContext
= ListItemState
| NullState
deriving (Eq, Show)
data QuoteContext
= InSingleQuote
| InDoubleQuote
| NoQuote
deriving (Eq, Show)
type NoteTable = [(String, [Block])]
type KeyTable = [([Inline], Target)]
lookupKeySrc :: KeyTable
-> [Inline]
-> Maybe Target
lookupKeySrc table key = case find (refsMatch key . fst) table of
Nothing -> Nothing
Just (_, src) -> Just src
refsMatch :: [Inline] -> [Inline] -> Bool
refsMatch ((Str x):restx) ((Str y):resty) =
((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((Emph x):restx) ((Emph y):resty) =
refsMatch x y && refsMatch restx resty
refsMatch ((Strong x):restx) ((Strong y):resty) =
refsMatch x y && refsMatch restx resty
refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
refsMatch x y && refsMatch restx resty
refsMatch ((Superscript x):restx) ((Superscript y):resty) =
refsMatch x y && refsMatch restx resty
refsMatch ((Subscript x):restx) ((Subscript y):resty) =
refsMatch x y && refsMatch restx resty
refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
t == u && refsMatch x y && refsMatch restx resty
refsMatch ((Code x):restx) ((Code y):resty) =
((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((TeX x):restx) ((TeX y):resty) =
((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
((map toLower x) == (map toLower y)) && refsMatch restx resty
refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
refsMatch [] x = null x
refsMatch x [] = null x
indentBy :: Int
-> Int
-> String
-> String
indentBy num first [] = ""
indentBy num first str =
let (firstLine:restLines) = lines str
firstLineIndent = num + first
in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
(joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines)
prettyBlockList :: Int
-> [Block]
-> String
prettyBlockList indent [] = indentBy indent 0 "[]"
prettyBlockList indent blocks = indentBy indent (2) $ "[ " ++
(joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
prettyBlock :: Block -> String
prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
(prettyBlockList 2 blocks)
prettyBlock (OrderedList attribs blockLists) =
"OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
(joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks)
blockLists)) ++ " ]"
prettyBlock (BulletList blockLists) = "BulletList\n" ++
indentBy 2 0 ("[ " ++ (joinWithSep ", "
(map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
indentBy 2 0 ("[" ++ (joinWithSep ",\n"
(map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
prettyBlock (Table caption aligns widths header rows) =
"Table " ++ show caption ++ " " ++ show aligns ++ " " ++
show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
(joinWithSep ",\n" (map prettyRow rows)) ++ " ]"
where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", "
(map (\blocks -> prettyBlockList 2 blocks)
cols))) ++ " ]"
prettyBlock block = show block
prettyPandoc :: Pandoc -> String
prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
seq = case numstyle of
DefaultStyle -> map show [start..]
Decimal -> map show [start..]
UpperAlpha -> drop (start 1) $ cycle $
map singleton ['A'..'Z']
LowerAlpha -> drop (start 1) $ cycle $
map singleton ['a'..'z']
UpperRoman -> map toRomanNumeral [start..]
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
inDelim str = case numdelim of
DefaultDelim -> str ++ "."
Period -> str ++ "."
OneParen -> str ++ ")"
TwoParens -> "(" ++ str ++ ")"
in map inDelim seq
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces [] = []
normalizeSpaces list =
let removeDoubles [] = []
removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
removeDoubles ((Str ""):rest) = removeDoubles rest
removeDoubles (x:rest) = x:(removeDoubles rest)
removeLeading (Space:xs) = removeLeading xs
removeLeading x = x
removeTrailing [] = []
removeTrailing lst = if (last lst == Space)
then init lst
else lst
in removeLeading $ removeTrailing $ removeDoubles list
compactify :: [[Block]]
-> [[Block]]
compactify [] = []
compactify items =
let final = last items
others = init items
in case final of
[Para a] -> if any containsPara others
then items
else others ++ [[Plain a]]
otherwise -> items
containsPara :: [Block] -> Bool
containsPara [] = False
containsPara ((Para a):rest) = True
containsPara ((BulletList items):rest) = any containsPara items ||
containsPara rest
containsPara ((OrderedList _ items):rest) = any containsPara items ||
containsPara rest
containsPara ((DefinitionList items):rest) = any containsPara (map snd items) ||
containsPara rest
containsPara (x:rest) = containsPara rest
data Element = Blk Block
| Sec [Inline] [Element] deriving (Eq, Read, Show)
headerAtLeast :: Int -> Block -> Bool
headerAtLeast level (Header x _) = x <= level
headerAtLeast level _ = False
hierarchicalize :: [Block] -> [Element]
hierarchicalize [] = []
hierarchicalize (block:rest) =
case block of
(Header level title) ->
let (thisSection, rest') = break (headerAtLeast level) rest
in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
x -> (Blk x):(hierarchicalize rest)
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _) = True
isHeaderBlock _ = False
data WriterOptions = WriterOptions
{ writerStandalone :: Bool
, writerHeader :: String
, writerTitlePrefix :: String
, writerTabStop :: Int
, writerTableOfContents :: Bool
, writerS5 :: Bool
, writerUseASCIIMathML :: Bool
, writerASCIIMathMLURL :: Maybe String
, writerIgnoreNotes :: Bool
, writerIncremental :: Bool
, writerNumberSections :: Bool
, writerIncludeBefore :: String
, writerIncludeAfter :: String
, writerStrictMarkdown :: Bool
, writerReferenceLinks :: Bool
} deriving Show
defaultWriterOptions =
WriterOptions { writerStandalone = False,
writerHeader = "",
writerTitlePrefix = "",
writerTabStop = 4,
writerTableOfContents = False,
writerS5 = False,
writerUseASCIIMathML = False,
writerASCIIMathMLURL = Nothing,
writerIgnoreNotes = False,
writerIncremental = False,
writerNumberSections = False,
writerIncludeBefore = "",
writerIncludeAfter = "",
writerStrictMarkdown = False,
writerReferenceLinks = False }