{- Copyright (C) 2014 Matthew Pickering This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Readers.Txt2Tags Copyright : Copyright (C) 2014 Matthew Pickering License : GNU GPL, version 2 or above Maintainer : Matthew Pickering Conversion of txt2tags formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags , getT2TMeta , T2TMeta (..) ) where import Control.Monad (guard, void, when) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader (Reader, asks, runReader) import Data.Char (toLower) import Data.Default import Data.List (intercalate, transpose) import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) import Data.Text (Text) import qualified Data.Text as T import Data.Time.Format (formatTime) import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import qualified Text.Pandoc.Class as P import Text.Pandoc.Compat.Time (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI, underlineSpan) type T2T = ParserT String ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file data T2TMeta = T2TMeta { date :: String -- ^ Current date , mtime :: String -- ^ Last modification time of infile , infile :: FilePath -- ^ Input file , outfile :: FilePath -- ^ Output file } deriving Show instance Default T2TMeta where def = T2TMeta "" "" "" "" -- | Get the meta information required by Txt2Tags macros getT2TMeta :: PandocMonad m => m T2TMeta getT2TMeta = do inps <- P.getInputFiles outp <- fromMaybe "" <$> P.getOutputFile curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime let getModTime = fmap (formatTime defaultTimeLocale "%T") . P.getModificationTime curMtime <- case inps of [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) return $ T2TMeta curDate curMtime (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readTxt2Tags opts s = do meta <- getT2TMeta let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) $ T.unpack (crFilter s) ++ "\n\n" case parsed of Right result -> return result Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc -- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc parseT2T = do -- Parse header if standalone flag is set standalone <- getOption readerStandalone when standalone parseHeader body <- mconcat <$> manyTill block eof meta' <- stateMeta <$> getState return $ Pandoc meta' (B.toList body) parseHeader :: T2T () parseHeader = do () <$ try blankline <|> header meta <- stateMeta <$> getState optional blanklines config <- manyTill setting (notFollowedBy setting) -- TODO: Handle settings better let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) meta config updateState (\s -> s {stateMeta = settings}) <* optional blanklines header :: T2T () header = titleline >> authorline >> dateline headerline :: B.ToMetaValue a => String -> T2T a -> T2T () headerline field p = (() <$ try blankline) <|> (p >>= updateState . B.setMeta field) titleline :: T2T () titleline = headerline "title" (trimInlines . mconcat <$> manyTill inline newline) authorline :: T2T () authorline = headerline "author" (sepBy author (char ';') <* newline) where author = trimInlines . mconcat <$> many (notFollowedBy (char ';' <|> newline) >> inline) dateline :: T2T () dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline) type Keyword = String type Value = String setting :: T2T (Keyword, Value) setting = do string "%!" keyword <- ignoreSpacesCap (many1 alphaNum) char ':' value <- ignoreSpacesCap (manyTill anyChar newline) return (keyword, value) -- Blocks parseBlocks :: T2T Blocks parseBlocks = mconcat <$> manyTill block eof block :: T2T Blocks block = choice [ mempty <$ blanklines , quote , hrule -- hrule must go above title , title , commentBlock , verbatim , rawBlock , taggedBlock , list , table , para ] title :: T2T Blocks title = try $ balancedTitle '+' <|> balancedTitle '=' balancedTitle :: Char -> T2T Blocks balancedTitle c = try $ do spaces level <- length <$> many1 (char c) guard (level <= 5) -- Max header level 5 heading <- manyTill (noneOf "\n\r") (count level (char c)) label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-")) many spaceChar *> newline let attr = maybe nullAttr (\x -> (x, [], [])) label return $ B.headerWith attr level (trimInlines $ B.text heading) para :: T2T Blocks para = try $ do ils <- parseInlines nl <- option False (True <$ newline) option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils)) where listStart = try bulletListStart <|> orderedListStart commentBlock :: T2T Blocks commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment -- Seperator and Strong line treated the same hrule :: T2T Blocks hrule = try $ do spaces line <- many1 (oneOf "=-_") guard (length line >= 20) B.horizontalRule <$ blankline quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines commentLine = comment -- List Parsing code from Org Reader list :: T2T Blocks list = choice [bulletList, orderedList, definitionList] bulletList :: T2T Blocks bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart parseBlocks) orderedList :: T2T Blocks orderedList = B.orderedList . compactify <$> many1 (listItem orderedListStart parseBlocks) definitionList :: T2T Blocks definitionList = try $ B.definitionList . compactifyDL <$> many1 (listItem definitionListStart definitionListEnd) definitionListEnd :: T2T (Inlines, [Blocks]) definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks) genericListStart :: T2T Char -> T2T Int genericListStart listMarker = try $ (2+) <$> (length <$> many spaceChar <* listMarker <* space <* notFollowedBy space) -- parses bullet list \start and returns its length (excl. following whitespace) bulletListStart :: T2T Int bulletListStart = genericListStart (char '-') orderedListStart :: T2T Int orderedListStart = genericListStart (char '+' ) definitionListStart :: T2T Int definitionListStart = genericListStart (char ':') -- parse raw text for one list item, excluding start marker and continuations listItem :: T2T Int -> T2T a -> T2T a listItem start end = try $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) parseFromString' end $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int -> T2T String listContinuation markerLength = try $ notFollowedBy' (blankline >> blankline) *> (mappend <$> (concat <$> many1 listLine) <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -- Table table :: T2T Blocks table = try $ do tableHeader <- fmap snd <$> option mempty (try headerRow) rows <- many1 (many commentLine *> tableRow) let columns = transpose rows let ncolumns = length columns let aligns = map (foldr1 findAlign . map fst) columns let rows' = map (map snd) rows let size = maximum (map length rows') let rowsPadded = map (pad size) rows' let headerPadded = if null tableHeader then mempty else pad size tableHeader return $ B.table mempty (zip aligns (replicate ncolumns 0.0)) headerPadded rowsPadded pad :: (Monoid a) => Int -> [a] -> [a] pad n xs = xs ++ replicate (n - length xs) mempty findAlign :: Alignment -> Alignment -> Alignment findAlign x y | x == y = x | otherwise = AlignDefault headerRow :: T2T [(Alignment, Blocks)] headerRow = genericRow (string "||") tableRow :: T2T [(Alignment, Blocks)] tableRow = genericRow (char '|') genericRow :: T2T a -> T2T [(Alignment, Blocks)] genericRow start = try $ do spaces *> start manyTill tableCell newline "genericRow" tableCell :: T2T (Alignment, Blocks) tableCell = try $ do leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead content <- manyTill inline (try $ lookAhead cellEnd) rightSpaces <- length <$> many space let align = case compare leftSpaces rightSpaces of LT -> AlignLeft EQ -> AlignCenter GT -> AlignRight endOfCell return (align, B.plain (B.trimInlines $ mconcat content)) where cellEnd = void newline <|> (many1 space *> endOfCell) endOfCell :: T2T () endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline) -- Raw area verbatim :: T2T Blocks verbatim = genericBlock anyLineNewline B.codeBlock "```" rawBlock :: T2T Blocks rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\"" taggedBlock :: T2T Blocks taggedBlock = do target <- getTarget genericBlock anyLineNewline (B.rawBlock target) "'''" -- Generic genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks blockMarkupArea p f s = try (do string s *> blankline f . mconcat <$> manyTill p (eof <|> void (string s *> blankline))) blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks blockMarkupLine p f s = try (f <$> (string s *> space *> p)) -- Can be in either block or inline position comment :: Monoid a => T2T a comment = try $ do atStart notFollowedBy macro mempty <$ (char '%' *> anyLine) -- Inline parseInlines :: T2T Inlines parseInlines = trimInlines . mconcat <$> many1 inline inline :: T2T Inlines inline = choice [ endline , macro , commentLine , whitespace , url , link , image , bold , underline , code , raw , tagged , strike , italic , code , str , symbol ] bold :: T2T Inlines bold = inlineMarkup inline B.strong '*' B.str underline :: T2T Inlines underline = inlineMarkup inline underlineSpan '_' B.str strike :: T2T Inlines strike = inlineMarkup inline B.strikeout '-' B.str italic :: T2T Inlines italic = inlineMarkup inline B.emph '/' B.str code :: T2T Inlines code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id raw :: T2T Inlines raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id tagged :: T2T Inlines tagged = do target <- getTarget inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id -- Parser for markup indicated by a double character. -- Inline markup is greedy and glued -- Greedy meaning ***a*** = Bold [Str "*a*"] -- Glued meaning that markup must be tight to content -- Markup can't pass newlines inlineMarkup :: Monoid a => T2T a -- Content parser -> (a -> Inlines) -- Constructor -> Char -- Fence -> (String -> a) -- Special Case to handle ****** -> T2T Inlines inlineMarkup p f c special = try $ do start <- many1 (char c) let l = length start guard (l >= 2) when (l == 2) (void $ notFollowedBy space) -- We must make sure that there is no space before the start of the -- closing tags body <- optionMaybe (try $ manyTill (noneOf "\n\r") (try $ lookAhead (noneOf " " >> string [c,c] ))) case body of Just middle -> do lastChar <- anyChar end <- many1 (char c) let parser inp = parseFromString' (mconcat <$> many p) inp let start' = case drop 2 start of "" -> mempty xs -> special xs body' <- parser (middle ++ [lastChar]) let end' = case drop 2 end of "" -> mempty xs -> special xs return $ f (start' <> body' <> end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) let body' = replicate (l - 4) c return $ f (special body') link :: T2T Inlines link = try imageLink <|> titleLink -- Link with title titleLink :: T2T Inlines titleLink = try $ do char '[' notFollowedBy space tokens <- sepBy1 (many $ noneOf " ]") space guard (length tokens >= 2) char ']' let link' = last tokens guard $ not $ null link' let tit = unwords (init tokens) return $ B.link link' "" (B.text tit) -- Link with image imageLink :: T2T Inlines imageLink = try $ do char '[' body <- image many1 space l <- manyTill (noneOf "\n\r ") (char ']') return (B.link l "" body) macro :: T2T Inlines macro = try $ do name <- string "%%" *> oneOfStringsCI (map fst commands) optional (try $ enclosed (char '(') (char ')') anyChar) lookAhead (spaceChar <|> oneOf specialChars <|> newline) maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands) where commands = [ ("date", date), ("mtime", mtime) , ("infile", infile), ("outfile", outfile)] -- raw URLs in text are automatically linked url :: T2T Inlines url = try $ do (rawUrl, escapedUrl) <- try uri <|> emailAddress return $ B.link rawUrl "" (B.str escapedUrl) uri :: T2T (String, String) uri = try $ do address <- t2tURI return (address, escapeURI address) -- The definition of a URI in the T2T source differs from the -- actual definition. This is a transcription of the definition in -- the source of v2.6 --isT2TURI :: String -> Bool --isT2TURI (parse t2tURI "" -> Right _) = True --isT2TURI _ = False t2tURI :: T2T String t2tURI = do start <- try ((++) <$> proto <*> urlLogin) <|> guess domain <- many1 chars sep <- many (char '/') form' <- option mempty ((:) <$> char '?' <*> many1 form) anchor' <- option mempty ((:) <$> char '#' <*> many anchor) return (start ++ domain ++ sep ++ form' ++ anchor') where protos = ["http", "https", "ftp", "telnet", "gopher", "wais"] proto = (++) <$> oneOfStrings protos <*> string "://" guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23")) <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.') login = alphaNum <|> oneOf "_.-" pass = many (noneOf " @") chars = alphaNum <|> oneOf "%._/~:,=$@&+-" anchor = alphaNum <|> oneOf "%._0" form = chars <|> oneOf ";*" urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@') image :: T2T Inlines image = try $ do -- List taken from txt2tags source let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] char '[' (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions) char ']' return $ B.image (path ++ ext) "" mempty -- Characters used in markup specialChars :: String specialChars = "%*-_/|:+;" tab :: T2T Char tab = char '\t' space :: T2T Char space = char ' ' spaces :: T2T String spaces = many space endline :: T2T Inlines endline = try $ do newline notFollowedBy blankline notFollowedBy hrule notFollowedBy title notFollowedBy verbatim notFollowedBy rawBlock notFollowedBy taggedBlock notFollowedBy quote notFollowedBy list notFollowedBy table return B.softbreak str :: T2T Inlines str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") whitespace :: T2T Inlines whitespace = try $ B.space <$ spaceChar symbol :: T2T Inlines symbol = B.str . (:[]) <$> oneOf specialChars -- Utility getTarget :: T2T String getTarget = do mv <- lookupMeta "target" . stateMeta <$> getState let MetaString target = fromMaybe (MetaString "html") mv return target atStart :: T2T () atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) ignoreSpacesCap :: T2T String -> T2T String ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)