{-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Sources Copyright : Copyright (C) 2021-2022 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Defines Sources object to be used as input to pandoc parsers and redefines Char parsers so they get source position information from it. -} module Text.Pandoc.Sources ( Sources(..) , ToSources(..) , UpdateSourcePos(..) , sourcesToText , initialSourceName , addToSources , ensureFinalNewlines , addToInput , satisfy , oneOf , noneOf , anyChar , char , string , newline , space , spaces , letter , digit , hexDigit , alphaNum ) where import qualified Text.Parsec as P import Text.Parsec (Stream(..), ParsecT) import Text.Parsec.Pos as P import Data.Text (Text) import qualified Data.Text as T import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit) import Data.String (IsString(..)) import qualified Data.List.NonEmpty as NonEmpty -- | A list of inputs labeled with source positions. It is assumed -- that the 'Text's have @\n@ line endings. newtype Sources = Sources { unSources :: [(SourcePos, Text)] } deriving (Show, Semigroup, Monoid) instance Monad m => Stream Sources m Char where uncons (Sources []) = return Nothing uncons (Sources ((pos,t):rest)) = case T.uncons t of Nothing -> uncons (Sources rest) Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest)) instance IsString Sources where fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))] class ToSources a where toSources :: a -> Sources instance ToSources Text where toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)] instance ToSources [(FilePath, Text)] where toSources = Sources . map (\(fp,t) -> (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n')) instance ToSources Sources where toSources = id sourcesToText :: Sources -> Text sourcesToText (Sources xs) = mconcat $ map snd xs addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m () addToSources pos t = do curpos <- P.getPosition Sources xs <- P.getInput let xs' = case xs of [] -> [] ((_,t'):rest) -> (curpos,t'):rest P.setInput $ Sources ((pos, T.filter (/='\r') t):xs') ensureFinalNewlines :: Int -- ^ number of trailing newlines -> Sources -> Sources ensureFinalNewlines n (Sources xs) = case NonEmpty.nonEmpty xs of Nothing -> Sources [(initialPos "", T.replicate n "\n")] Just lst -> case NonEmpty.last lst of (spos, t) -> case T.length (T.takeWhileEnd (=='\n') t) of len | len >= n -> Sources xs | otherwise -> Sources (NonEmpty.init lst ++ [(spos, t <> T.replicate (n - len) "\n")]) class UpdateSourcePos s c where updateSourcePos :: SourcePos -> c -> s -> SourcePos instance UpdateSourcePos Text Char where updateSourcePos pos c _ = updatePosChar pos c instance UpdateSourcePos Sources Char where updateSourcePos pos c sources = case sources of Sources [] -> updatePosChar pos c Sources ((_,t):(pos',_):_) | T.null t -> pos' Sources _ -> case c of '\n' -> incSourceLine (setSourceColumn pos 1) 1 '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4)) _ -> incSourceColumn pos 1 -- | Get name of first source in 'Sources'. initialSourceName :: Sources -> FilePath initialSourceName (Sources []) = "" initialSourceName (Sources ((pos,_):_)) = sourceName pos -- | Add some text to the beginning of the input sources. -- This simplifies code that expands macros. addToInput :: Monad m => Text -> ParsecT Sources u m () addToInput t = do Sources xs <- P.getInput case xs of [] -> P.setInput $ Sources [(initialPos "",t)] (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest) -- We need to redefine the parsers in Text.Parsec.Char so that they -- update source positions properly from the Sources stream. satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => (Char -> Bool) -> ParsecT s u m Char satisfy f = P.tokenPrim show updateSourcePos matcher where matcher !c = if f c then Just c else Nothing oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParsecT s u m Char oneOf cs = satisfy (`elem` cs) noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParsecT s u m Char noneOf cs = satisfy (`notElem` cs) anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char anyChar = satisfy (const True) char :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => Char -> ParsecT s u m Char char c = satisfy (== c) string :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParsecT s u m [Char] string = mapM char newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char newline = satisfy (== '\n') space :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char space = satisfy isSpace spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m () spaces = P.skipMany space P. "white space" letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char letter = satisfy isLetter alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char alphaNum = satisfy isAlphaNum digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char digit = satisfy isDigit hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char hexDigit = satisfy isHexDigit