module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
, getT2TMeta
, T2TMeta (..)
)
where
import Control.Monad (guard, void, when)
import Control.Monad.Except (catchError, throwError)
import Control.Monad.Reader (Reader, asks, runReader)
import Data.Char (toLower)
import Data.Default
import Data.List (intercalate, transpose)
import Data.Maybe (fromMaybe)
import Data.Monoid ((<>))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Format (formatTime)
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
import qualified Text.Pandoc.Class as P
import Text.Pandoc.Compat.Time (defaultTimeLocale)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (space, spaces, uri)
import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI,
underlineSpan)
type T2T = ParserT String ParserState (Reader T2TMeta)
data T2TMeta = T2TMeta {
date :: String
, mtime :: String
, infile :: FilePath
, outfile :: FilePath
} deriving Show
instance Default T2TMeta where
def = T2TMeta "" "" "" ""
getT2TMeta :: PandocMonad m => m T2TMeta
getT2TMeta = do
inps <- P.getInputFiles
outp <- fromMaybe "" <$> P.getOutputFile
curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
let getModTime = fmap (formatTime defaultTimeLocale "%T") .
P.getModificationTime
curMtime <- case inps of
[] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
_ -> catchError
(maximum <$> mapM getModTime inps)
(const (return ""))
return $ T2TMeta curDate curMtime (intercalate ", " inps) outp
readTxt2Tags :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
readTxt2Tags opts s = do
meta <- getT2TMeta
let parsed = flip runReader meta $
readWithM parseT2T (def {stateOptions = opts}) $
T.unpack (crFilter s) ++ "\n\n"
case parsed of
Right result -> return result
Left e -> throwError e
parseT2T :: T2T Pandoc
parseT2T = do
standalone <- getOption readerStandalone
when standalone parseHeader
body <- mconcat <$> manyTill block eof
meta' <- stateMeta <$> getState
return $ Pandoc meta' (B.toList body)
parseHeader :: T2T ()
parseHeader = do
() <$ try blankline <|> header
meta <- stateMeta <$> getState
optional blanklines
config <- manyTill setting (notFollowedBy setting)
let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) meta config
updateState (\s -> s {stateMeta = settings}) <* optional blanklines
header :: T2T ()
header = titleline >> authorline >> dateline
headerline :: B.ToMetaValue a => String -> T2T a -> T2T ()
headerline field p = (() <$ try blankline)
<|> (p >>= updateState . B.setMeta field)
titleline :: T2T ()
titleline =
headerline "title" (trimInlines . mconcat <$> manyTill inline newline)
authorline :: T2T ()
authorline =
headerline "author" (sepBy author (char ';') <* newline)
where
author = trimInlines . mconcat <$> many (notFollowedBy (char ';' <|> newline) >> inline)
dateline :: T2T ()
dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline)
type Keyword = String
type Value = String
setting :: T2T (Keyword, Value)
setting = do
string "%!"
keyword <- ignoreSpacesCap (many1 alphaNum)
char ':'
value <- ignoreSpacesCap (manyTill anyChar newline)
return (keyword, value)
parseBlocks :: T2T Blocks
parseBlocks = mconcat <$> manyTill block eof
block :: T2T Blocks
block =
choice
[ mempty <$ blanklines
, quote
, hrule
, title
, commentBlock
, verbatim
, rawBlock
, taggedBlock
, list
, table
, para
]
title :: T2T Blocks
title = try $ balancedTitle '+' <|> balancedTitle '='
balancedTitle :: Char -> T2T Blocks
balancedTitle c = try $ do
spaces
level <- length <$> many1 (char c)
guard (level <= 5)
heading <- manyTill (noneOf "\n\r") (count level (char c))
label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
many spaceChar *> newline
let attr = maybe nullAttr (\x -> (x, [], [])) label
return $ B.headerWith attr level (trimInlines $ B.text heading)
para :: T2T Blocks
para = try $ do
ils <- parseInlines
nl <- option False (True <$ newline)
option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils))
where
listStart = try bulletListStart <|> orderedListStart
commentBlock :: T2T Blocks
commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment
hrule :: T2T Blocks
hrule = try $ do
spaces
line <- many1 (oneOf "=-_")
guard (length line >= 20)
B.horizontalRule <$ blankline
quote :: T2T Blocks
quote = try $ do
lookAhead tab
rawQuote <- many1 (tab *> optional spaces *> anyLine)
contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
return $ B.blockQuote contents
commentLine :: T2T Inlines
commentLine = comment
list :: T2T Blocks
list = choice [bulletList, orderedList, definitionList]
bulletList :: T2T Blocks
bulletList = B.bulletList . compactify
<$> many1 (listItem bulletListStart parseBlocks)
orderedList :: T2T Blocks
orderedList = B.orderedList . compactify
<$> many1 (listItem orderedListStart parseBlocks)
definitionList :: T2T Blocks
definitionList = try $
B.definitionList . compactifyDL <$>
many1 (listItem definitionListStart definitionListEnd)
definitionListEnd :: T2T (Inlines, [Blocks])
definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks)
genericListStart :: T2T Char
-> T2T Int
genericListStart listMarker = try $
(2+) <$> (length <$> many spaceChar
<* listMarker <* space <* notFollowedBy space)
bulletListStart :: T2T Int
bulletListStart = genericListStart (char '-')
orderedListStart :: T2T Int
orderedListStart = genericListStart (char '+' )
definitionListStart :: T2T Int
definitionListStart = genericListStart (char ':')
listItem :: T2T Int
-> T2T a
-> T2T a
listItem start end = try $ do
markerLength <- try start
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
rest <- concat <$> many (listContinuation markerLength)
parseFromString' end $ firstLine ++ blank ++ rest
listContinuation :: Int
-> T2T String
listContinuation markerLength = try $
notFollowedBy' (blankline >> blankline)
*> (mappend <$> (concat <$> many1 listLine)
<*> many blankline)
where listLine = try $ indentWith markerLength *> anyLineNewline
table :: T2T Blocks
table = try $ do
tableHeader <- fmap snd <$> option mempty (try headerRow)
rows <- many1 (many commentLine *> tableRow)
let columns = transpose rows
let ncolumns = length columns
let aligns = map (foldr1 findAlign . map fst) columns
let rows' = map (map snd) rows
let size = maximum (map length rows')
let rowsPadded = map (pad size) rows'
let headerPadded = if null tableHeader then mempty else pad size tableHeader
return $ B.table mempty
(zip aligns (replicate ncolumns 0.0))
headerPadded rowsPadded
pad :: (Monoid a) => Int -> [a] -> [a]
pad n xs = xs ++ replicate (n length xs) mempty
findAlign :: Alignment -> Alignment -> Alignment
findAlign x y
| x == y = x
| otherwise = AlignDefault
headerRow :: T2T [(Alignment, Blocks)]
headerRow = genericRow (string "||")
tableRow :: T2T [(Alignment, Blocks)]
tableRow = genericRow (char '|')
genericRow :: T2T a -> T2T [(Alignment, Blocks)]
genericRow start = try $ do
spaces *> start
manyTill tableCell newline <?> "genericRow"
tableCell :: T2T (Alignment, Blocks)
tableCell = try $ do
leftSpaces <- length <$> lookAhead (many1 space)
content <- manyTill inline (try $ lookAhead cellEnd)
rightSpaces <- length <$> many space
let align =
case compare leftSpaces rightSpaces of
LT -> AlignLeft
EQ -> AlignCenter
GT -> AlignRight
endOfCell
return (align, B.plain (B.trimInlines $ mconcat content))
where
cellEnd = void newline <|> (many1 space *> endOfCell)
endOfCell :: T2T ()
endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline)
verbatim :: T2T Blocks
verbatim = genericBlock anyLineNewline B.codeBlock "```"
rawBlock :: T2T Blocks
rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\""
taggedBlock :: T2T Blocks
taggedBlock = do
target <- getTarget
genericBlock anyLineNewline (B.rawBlock target) "'''"
genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
blockMarkupArea p f s = try (do
string s *> blankline
f . mconcat <$> manyTill p (eof <|> void (string s *> blankline)))
blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
blockMarkupLine p f s = try (f <$> (string s *> space *> p))
comment :: Monoid a => T2T a
comment = try $ do
atStart
notFollowedBy macro
mempty <$ (char '%' *> anyLine)
parseInlines :: T2T Inlines
parseInlines = trimInlines . mconcat <$> many1 inline
inline :: T2T Inlines
inline =
choice
[ endline
, macro
, commentLine
, whitespace
, url
, link
, image
, bold
, underline
, code
, raw
, tagged
, strike
, italic
, code
, str
, symbol
]
bold :: T2T Inlines
bold = inlineMarkup inline B.strong '*' B.str
underline :: T2T Inlines
underline = inlineMarkup inline underlineSpan '_' B.str
strike :: T2T Inlines
strike = inlineMarkup inline B.strikeout '-' B.str
italic :: T2T Inlines
italic = inlineMarkup inline B.emph '/' B.str
code :: T2T Inlines
code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
raw :: T2T Inlines
raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
tagged :: T2T Inlines
tagged = do
target <- getTarget
inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
inlineMarkup :: Monoid a
=> T2T a
-> (a -> Inlines)
-> Char
-> (String -> a)
-> T2T Inlines
inlineMarkup p f c special = try $ do
start <- many1 (char c)
let l = length start
guard (l >= 2)
when (l == 2) (void $ notFollowedBy space)
body <- optionMaybe (try $ manyTill (noneOf "\n\r")
(try $ lookAhead (noneOf " " >> string [c,c] )))
case body of
Just middle -> do
lastChar <- anyChar
end <- many1 (char c)
let parser inp = parseFromString' (mconcat <$> many p) inp
let start' = case drop 2 start of
"" -> mempty
xs -> special xs
body' <- parser (middle ++ [lastChar])
let end' = case drop 2 end of
"" -> mempty
xs -> special xs
return $ f (start' <> body' <> end')
Nothing -> do
guard (l >= 5)
let body' = replicate (l 4) c
return $ f (special body')
link :: T2T Inlines
link = try imageLink <|> titleLink
titleLink :: T2T Inlines
titleLink = try $ do
char '['
notFollowedBy space
tokens <- sepBy1 (many $ noneOf " ]") space
guard (length tokens >= 2)
char ']'
let link' = last tokens
guard $ not $ null link'
let tit = unwords (init tokens)
return $ B.link link' "" (B.text tit)
imageLink :: T2T Inlines
imageLink = try $ do
char '['
body <- image
many1 space
l <- manyTill (noneOf "\n\r ") (char ']')
return (B.link l "" body)
macro :: T2T Inlines
macro = try $ do
name <- string "%%" *> oneOfStringsCI (map fst commands)
optional (try $ enclosed (char '(') (char ')') anyChar)
lookAhead (spaceChar <|> oneOf specialChars <|> newline)
maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
where
commands = [ ("date", date), ("mtime", mtime)
, ("infile", infile), ("outfile", outfile)]
url :: T2T Inlines
url = try $ do
(rawUrl, escapedUrl) <- try uri <|> emailAddress
return $ B.link rawUrl "" (B.str escapedUrl)
uri :: T2T (String, String)
uri = try $ do
address <- t2tURI
return (address, escapeURI address)
t2tURI :: T2T String
t2tURI = do
start <- try ((++) <$> proto <*> urlLogin) <|> guess
domain <- many1 chars
sep <- many (char '/')
form' <- option mempty ((:) <$> char '?' <*> many1 form)
anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
return (start ++ domain ++ sep ++ form' ++ anchor')
where
protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
proto = (++) <$> oneOfStrings protos <*> string "://"
guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
<|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
login = alphaNum <|> oneOf "_.-"
pass = many (noneOf " @")
chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
anchor = alphaNum <|> oneOf "%._0"
form = chars <|> oneOf ";*"
urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
image :: T2T Inlines
image = try $ do
let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
char '['
(path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions)
char ']'
return $ B.image (path ++ ext) "" mempty
specialChars :: String
specialChars = "%*-_/|:+;"
tab :: T2T Char
tab = char '\t'
space :: T2T Char
space = char ' '
spaces :: T2T String
spaces = many space
endline :: T2T Inlines
endline = try $ do
newline
notFollowedBy blankline
notFollowedBy hrule
notFollowedBy title
notFollowedBy verbatim
notFollowedBy rawBlock
notFollowedBy taggedBlock
notFollowedBy quote
notFollowedBy list
notFollowedBy table
return B.softbreak
str :: T2T Inlines
str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
whitespace :: T2T Inlines
whitespace = try $ B.space <$ spaceChar
symbol :: T2T Inlines
symbol = B.str . (:[]) <$> oneOf specialChars
getTarget :: T2T String
getTarget = do
mv <- lookupMeta "target" . stateMeta <$> getState
let MetaString target = fromMaybe (MetaString "html") mv
return target
atStart :: T2T ()
atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
ignoreSpacesCap :: T2T String -> T2T String
ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)