module Text.Pandoc.Shared (
splitBy,
splitByIndices,
substitute,
backslashEscapes,
escapeStringUsing,
stripTrailingNewlines,
removeLeadingTrailingSpace,
removeLeadingSpace,
removeTrailingSpace,
stripFirstAndLast,
camelCaseToHyphenated,
toRomanNumeral,
escapeURI,
unescapeURI,
tabFilter,
wrapped,
wrapIfNeeded,
wrappedTeX,
wrapTeXIfNeeded,
BlockWrapper (..),
wrappedBlocksToDoc,
hang',
orderedListMarkers,
normalizeSpaces,
compactify,
Element (..),
hierarchicalize,
uniqueIdent,
isHeaderBlock,
headerShift,
HTMLMathMethod (..),
ObfuscationMethod (..),
HTMLSlideVariant (..),
WriterOptions (..),
defaultWriterOptions,
inDirectory,
readDataFile
) where
import Text.Pandoc.Definition
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
import qualified Text.PrettyPrint.HughesPJ as PP
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
isLetter, isDigit )
import Data.List ( find, isPrefixOf, intercalate )
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
import System.Directory
import System.FilePath ( (</>) )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import Paths_pandoc (getDataFileName)
splitBy :: (Eq a) => a -> [a] -> [[a]]
splitBy _ [] = []
splitBy sep lst =
let (first, rest) = break (== sep) lst
rest' = dropWhile (== sep) rest
in first:(splitBy sep rest')
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst =
let (first, rest) = splitAt x lst in
first:(splitByIndices (map (\y -> y x) xs) rest)
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
if target `isPrefixOf` lst
then replacement ++ substitute target replacement (drop (length target) lst)
else x : substitute target replacement xs
backslashEscapes :: [Char]
-> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
case (lookup x escapeTable) of
Just str -> str ++ rest
Nothing -> x:rest
where rest = escapeStringUsing escapeTable xs
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
removeLeadingTrailingSpace :: String -> String
removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
removeLeadingSpace :: String -> String
removeLeadingSpace = dropWhile (`elem` " \n\t")
removeTrailingSpace :: String -> String
removeTrailingSpace = reverse . removeLeadingSpace . reverse
stripFirstAndLast :: String -> String
stripFirstAndLast str =
drop 1 $ take ((length str) 1) str
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
toRomanNumeral :: Int -> String
toRomanNumeral x =
if x >= 4000 || x < 0
then "?"
else case x of
_ | x >= 1000 -> "M" ++ toRomanNumeral (x 1000)
_ | x >= 900 -> "CM" ++ toRomanNumeral (x 900)
_ | x >= 500 -> "D" ++ toRomanNumeral (x 500)
_ | x >= 400 -> "CD" ++ toRomanNumeral (x 400)
_ | x >= 100 -> "C" ++ toRomanNumeral (x 100)
_ | x >= 90 -> "XC" ++ toRomanNumeral (x 90)
_ | x >= 50 -> "L" ++ toRomanNumeral (x 50)
_ | x >= 40 -> "XL" ++ toRomanNumeral (x 40)
_ | x >= 10 -> "X" ++ toRomanNumeral (x 10)
_ | x >= 9 -> "IX" ++ toRomanNumeral (x 5)
_ | x >= 5 -> "V" ++ toRomanNumeral (x 5)
_ | x >= 4 -> "IV" ++ toRomanNumeral (x 4)
_ | x >= 1 -> "I" ++ toRomanNumeral (x 1)
_ -> ""
escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString
unescapeURI :: String -> String
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
decodeString . unEscapeString
tabFilter :: Int
-> String
-> String
tabFilter tabStop =
let go _ [] = ""
go _ ('\n':xs) = '\n' : go tabStop xs
go _ ('\r':'\n':xs) = '\n' : go tabStop xs
go _ ('\r':xs) = '\n' : go tabStop xs
go spsToNextStop ('\t':xs) =
if tabStop == 0
then '\t' : go tabStop xs
else replicate spsToNextStop ' ' ++ go tabStop xs
go 1 (x:xs) =
x : go tabStop xs
go spsToNextStop (x:xs) =
x : go (spsToNextStop 1) xs
in go tabStop
wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
return . fsep
wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
[Inline] -> m Doc
wrapIfNeeded opts = if writerWrapText opts
then wrapped
else ($)
isNote :: Inline -> Bool
isNote (Note _) = True
isNote _ = False
wrappedTeX :: Monad m
=> Bool
-> ([Inline] -> m Doc)
-> [Inline]
-> m Doc
wrappedTeX includePercent listWriter sect = do
let (firstpart, rest) = break isNote sect
firstpartWrapped <- wrapped listWriter firstpart
if null rest
then return firstpartWrapped
else do let (note:rest') = rest
let (rest1, rest2) = break (== Space) rest'
rest1Out <- if null rest1
then return empty
else listWriter rest1
rest2Wrapped <- if null rest2
then return empty
else wrappedTeX includePercent listWriter (tail rest2)
noteText <- listWriter [note]
return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$
(noteText <> rest1Out) $$
rest2Wrapped
wrapTeXIfNeeded :: Monad m
=> WriterOptions
-> Bool
-> ([Inline] -> m Doc)
-> [Inline]
-> m Doc
wrapTeXIfNeeded opts includePercent = if writerWrapText opts
then wrappedTeX includePercent
else ($)
data BlockWrapper = Pad Doc | Reg Doc
wrappedBlocksToDoc :: [BlockWrapper] -> Doc
wrappedBlocksToDoc = foldr addBlock empty
where addBlock (Pad d) accum | isEmpty accum = d
addBlock (Pad d) accum = d $$ text "" $$ accum
addBlock (Reg d) accum = d $$ accum
hang' :: Doc -> Int -> Doc -> Doc
hang' d1 n d2 = d1 $$ (nest n d2)
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
nums = case numstyle of
DefaultStyle -> map show [start..]
Example -> map show [start..]
Decimal -> map show [start..]
UpperAlpha -> drop (start 1) $ cycle $
map singleton ['A'..'Z']
LowerAlpha -> drop (start 1) $ cycle $
map singleton ['a'..'z']
UpperRoman -> map toRomanNumeral [start..]
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
inDelim str = case numdelim of
DefaultDelim -> str ++ "."
Period -> str ++ "."
OneParen -> str ++ ")"
TwoParens -> "(" ++ str ++ ")"
in map inDelim nums
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces [] = []
normalizeSpaces list =
let removeDoubles [] = []
removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
removeDoubles ((Str ""):rest) = removeDoubles rest
removeDoubles (x:rest) = x:(removeDoubles rest)
removeLeading (Space:xs) = removeLeading xs
removeLeading x = x
removeTrailing [] = []
removeTrailing lst = if (last lst == Space)
then init lst
else lst
in removeLeading $ removeTrailing $ removeDoubles list
compactify :: [[Block]]
-> [[Block]]
compactify [] = []
compactify items =
case (init items, last items) of
(_,[]) -> items
(others, final) ->
case last final of
Para a -> case (filter isPara $ concat items) of
[_] -> others ++ [init final ++ [Plain a]]
_ -> items
_ -> items
isPara :: Block -> Bool
isPara (Para _) = True
isPara _ = False
data Element = Blk Block
| Sec Int [Int] String [Inline] [Element]
deriving (Eq, Read, Show, Typeable, Data)
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
dropWhile (not . isAlpha) . intercalate "-" . words . map toLower .
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
concatMap extractText
where extractText x = case x of
Str s -> s
Emph lst -> concatMap extractText lst
Strikeout lst -> concatMap extractText lst
Superscript lst -> concatMap extractText lst
SmallCaps lst -> concatMap extractText lst
Subscript lst -> concatMap extractText lst
Strong lst -> concatMap extractText lst
Quoted _ lst -> concatMap extractText lst
Cite _ lst -> concatMap extractText lst
Code s -> s
Space -> " "
EmDash -> "---"
EnDash -> "--"
Apostrophe -> ""
Ellipses -> "..."
LineBreak -> " "
Math _ s -> s
TeX _ -> ""
HtmlInline _ -> ""
Link lst _ -> concatMap extractText lst
Image lst _ -> concatMap extractText lst
Note _ -> ""
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) ([],[])
hierarchicalizeWithIds :: [Block] -> S.State ([Int],[String]) [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level title'):xs) = do
(lastnum, usedIdents) <- S.get
let ident = uniqueIdent title' usedIdents
let lastnum' = take level lastnum
let newnum = if length lastnum' >= level
then init lastnum' ++ [last lastnum' + 1]
else lastnum ++ replicate (level length lastnum 1) 0 ++ [1]
S.put (newnum, (ident : usedIdents))
let (sectionContents, rest) = break (headerLtEq level) xs
sectionContents' <- hierarchicalizeWithIds sectionContents
rest' <- hierarchicalizeWithIds rest
return $ Sec level newnum ident title' sectionContents' : rest'
hierarchicalizeWithIds (x:rest) = do
rest' <- hierarchicalizeWithIds rest
return $ (Blk x) : rest'
headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _) = l <= level
headerLtEq _ _ = False
uniqueIdent :: [Inline] -> [String] -> String
uniqueIdent title' usedIdents =
let baseIdent = case inlineListToIdentifier title' of
"" -> "section"
x -> x
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
Nothing -> baseIdent
else baseIdent
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _) = True
isHeaderBlock _ = False
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = processWith shift
where shift :: Block -> Block
shift (Header level inner) = Header (level + n) inner
shift x = x
data HTMLMathMethod = PlainMath
| LaTeXMathML (Maybe String)
| JsMath (Maybe String)
| GladTeX
| WebTeX String
| MathML (Maybe String)
deriving (Show, Read, Eq)
data ObfuscationMethod = NoObfuscation
| ReferenceObfuscation
| JavascriptObfuscation
deriving (Show, Read, Eq)
data HTMLSlideVariant = S5Slides
| SlidySlides
| NoSlides
deriving (Show, Read, Eq)
data WriterOptions = WriterOptions
{ writerStandalone :: Bool
, writerTemplate :: String
, writerVariables :: [(String, String)]
, writerEPUBMetadata :: String
, writerTabStop :: Int
, writerTableOfContents :: Bool
, writerSlideVariant :: HTMLSlideVariant
, writerIncremental :: Bool
, writerXeTeX :: Bool
, writerHTMLMathMethod :: HTMLMathMethod
, writerIgnoreNotes :: Bool
, writerNumberSections :: Bool
, writerSectionDivs :: Bool
, writerStrictMarkdown :: Bool
, writerReferenceLinks :: Bool
, writerWrapText :: Bool
, writerLiterateHaskell :: Bool
, writerEmailObfuscation :: ObfuscationMethod
, writerIdentifierPrefix :: String
, writerSourceDirectory :: FilePath
, writerUserDataDir :: Maybe FilePath
} deriving Show
defaultWriterOptions :: WriterOptions
defaultWriterOptions =
WriterOptions { writerStandalone = False
, writerTemplate = ""
, writerVariables = []
, writerEPUBMetadata = ""
, writerTabStop = 4
, writerTableOfContents = False
, writerSlideVariant = NoSlides
, writerIncremental = False
, writerXeTeX = False
, writerHTMLMathMethod = PlainMath
, writerIgnoreNotes = False
, writerNumberSections = False
, writerSectionDivs = True
, writerStrictMarkdown = False
, writerReferenceLinks = False
, writerWrapText = True
, writerLiterateHaskell = False
, writerEmailObfuscation = JavascriptObfuscation
, writerIdentifierPrefix = ""
, writerSourceDirectory = "."
, writerUserDataDir = Nothing
}
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = do
oldDir <- getCurrentDirectory
setCurrentDirectory path
result <- action
setCurrentDirectory oldDir
return result
readDataFile :: Maybe FilePath -> FilePath -> IO String
readDataFile userDir fname =
case userDir of
Nothing -> getDataFileName fname >>= UTF8.readFile
Just u -> catch (UTF8.readFile $ u </> fname)
(\_ -> getDataFileName fname >>= UTF8.readFile)