module Text.Luthor.Syntax (
char, string, charI, stringI
, upAlpha, loAlpha, alpha
, digit, P.hexDigit, P.octDigit, binDigit
, ctl
, asciiText, uniText
, cr, lf, sp, ht, sq, dq
, colon, semicolon, dot, comma
, bsEsc
, yes, no, yesno
, lws, newline, lineBreak, crlf
, bsnl, bsnlwsbs
, IndentPolicy(..), dentation
, many1Not
, sigilized
, inParens, inBrackets, inBraces, inAngles
, numSign, numBase, numNatural, numAfterPoint, numDenominator
, numOptSign, numInteger
, xDigit, stringToInteger, stringToMantissa
, integer, rational, scientific
, letterEsc
, decimalEsc
, asciiEsc, loUniEsc, hiUniEsc, uniEsc
, cEscapes
, sqString, dqString
, lineComment
, blockComment
, nestingComment
, charClass
, uniPrint, uniPrintMinus
, uniIdClass, uniIdClassMinus
, aChar, P.anyChar, P.oneOf, P.noneOf
) where
import Data.Ratio
import Data.Char
import Data.String (IsString(..))
import Data.Maybe
import Data.List
import Text.Parsec (ParsecT, Stream)
import qualified Text.Parsec as P
import Text.Parsec.Char (satisfy, char, oneOf, noneOf)
import Text.Luthor.Combinator
import Control.Monad
string :: (Stream s m Char) => String -> ParsecT s u m String
string = try . P.string
charI :: (Stream s m Char) => Char -> ParsecT s u m Char
charI c = expect [toLower c] . expect [toUpper c] . satisfy $ (== toLower c) . toLower
stringI :: (Stream s m Char) => String -> ParsecT s u m String
stringI str = try $ mapM charI str
upAlpha :: (Stream s m Char) => ParsecT s u m Char
upAlpha = expect "uppercase character" $ satisfy _upAlpha
loAlpha :: (Stream s m Char) => ParsecT s u m Char
loAlpha = expect "lowercase character" $ satisfy _loAlpha
alpha :: (Stream s m Char) => ParsecT s u m Char
alpha = expect "alphabetic character" $ satisfy _alpha
digit :: (Stream s m Char) => ParsecT s u m Char
digit = expect "digit" $ satisfy _digit
binDigit :: (Stream s m Char) => ParsecT s u m Char
binDigit = expect "binary digit" $ oneOf "01"
ctl :: (Stream s m Char) => ParsecT s u m Char
ctl = expect "control character" $ satisfy _asciiControl
asciiText :: (Stream s m Char) => ParsecT s u m Char
asciiText = expect "printable ascii character" $ satisfy (\c -> '\32' <= c && c <= '\126')
uniText :: (Stream s m Char) => ParsecT s u m Char
uniText = expect "printable unicode character" $ satisfy uniPrint
cr :: (Stream s m Char) => ParsecT s u m ()
cr = expect "carraige return" . void $ char '\r'
lf :: (Stream s m Char) => ParsecT s u m ()
lf = expect "linefeed" . void $ char '\n'
sp :: (Stream s m Char) => ParsecT s u m ()
sp = expect "space" . void $ char ' '
ht :: (Stream s m Char) => ParsecT s u m ()
ht = expect "tab" . void $ char '\t'
sq :: (Stream s m Char) => ParsecT s u m ()
sq = expect "single quote" . void $ char '\''
dq :: (Stream s m Char) => ParsecT s u m ()
dq = expect "double quote" . void $ char '\"'
colon :: (Stream s m Char) => ParsecT s u m ()
colon = expect "colon" . void $ char ':'
semicolon :: (Stream s m Char) => ParsecT s u m ()
semicolon = expect "semicolon" . void $ char ';'
dot :: (Stream s m Char) => ParsecT s u m ()
dot = expect "dot" . void $ char '.'
comma :: (Stream s m Char) => ParsecT s u m ()
comma = expect "comma" . void $ char ','
bsEsc :: (Stream s m Char) => (Char -> Bool) -> ParsecT s u m Char
bsEsc p = try $ char '\\' *> satisfy p
yes :: (Stream s m Char) => ParsecT s u m Bool
yes = True <$ choice [
void $ stringI "yes"
, void $ charI 'y'
, void $ char '1'
]
no :: (Stream s m Char) => ParsecT s u m Bool
no = False <$ choice [
void $ stringI "no"
, void $ charI 'n'
, void $ char '0'
]
yesno :: (Stream s m Char) => ParsecT s u m Bool
yesno = yes <|> no
crlf :: (Stream s m Char) => ParsecT s u m ()
crlf = expect "CRLF" $ void "\r\n"
lws :: (Stream s m Char) => ParsecT s u m String
lws = expect "linear whitespace" . many1 $ oneOf " \t"
newline :: (Stream s m Char) => ParsecT s u m ()
newline = expect "newline" . void $ oneOf "\n\r"
lineBreak :: (Stream s m Char) => ParsecT s u m ()
lineBreak = expect "line break" $ newline <|> P.eof
bsnl :: (Stream s m Char) => ParsecT s u m ()
bsnl = void $ char '\\' *> newline
bsnlwsbs :: (Stream s m Char) => ParsecT s u m ()
bsnlwsbs = void $ between2 (char '\\') $ newline *> lws
data IndentPolicy = DontMix [Char]
| Convert [(Char, Int)]
dentation :: (Stream s m Char) => IndentPolicy -> ParsecT s u m Int
dentation = _dentation lineBreak
lexDentation :: (Stream s m Char) => IndentPolicy -> ParsecT s u m Int
lexDentation = _dentation newline
_dentation :: (Stream s m Char) => ParsecT s u m newline -> IndentPolicy -> ParsecT s u m Int
_dentation nl (DontMix cs) = try $ do
nl
ws <- P.many $ oneOf cs
when (length (nub ws) > 1) $ unexpected "mixed indentation"
return $ length ws
_dentation nl (Convert table) = try $ do
nl
ws <- P.many $ oneOf (fst <$> table)
return $ sum [fromJust $ lookup c table | c <- ws]
many1Not :: (Stream s m Char)
=> (Char -> Bool)
-> (Char -> Bool)
-> ParsecT s u m String
many1Not allowed notAtFront = try $ do
first <- satisfy $ \c -> allowed c && (not . notAtFront) c
rest <- many $ satisfy allowed
return (first:rest)
sigilized :: (Stream s m Char)
=> [(Char, sigil)]
-> ParsecT s u m a
-> ParsecT s u m (sigil, a)
sigilized sigils ident = try $ do
let wrap (c, s) = (char c, pure s)
dispatch (wrap <$> sigils) <$$> (,) <*> ident
inParens :: (Stream s m Char) => ParsecT s u m a -> ParsecT s u m a
inParens = between (char '(') (char ')')
inBrackets :: (Stream s m Char) => ParsecT s u m a -> ParsecT s u m a
inBrackets = between (char '[') (char ']')
inBraces :: (Stream s m Char) => ParsecT s u m a -> ParsecT s u m a
inBraces = between (char '{') (char '}')
inAngles :: (Stream s m Char) => ParsecT s u m a -> ParsecT s u m a
inAngles = between (char '<') (char '>')
numSign :: (Stream s m Char) => ParsecT s u m Integer
numSign = dispatch $ zip (char <$> "-+") (pure <$> [1, 1])
numBase :: (Stream s m Char) => ParsecT s u m Int
numBase = P.option 10 . dispatch $ zip
(stringI <$> ["0x", "0o", "0b"])
(pure <$> [16, 8, 2])
numNatural :: (Stream s m Char) => Int -> ParsecT s u m Integer
numNatural base = stringToInteger base <$> xDigits base
numAfterPoint :: (Stream s m Char) => Int -> ParsecT s u m Rational
numAfterPoint base = stringToMantissa base <$> xDigits base
numDenominator :: (Stream s m Char) => Int -> ParsecT s u m Rational
numDenominator base = try $ do
denom <- numNatural base
if denom == 0 then P.parserZero else return (1%denom)
numOptSign :: (Stream s m Char) => ParsecT s u m Integer
numOptSign = P.option 1 numSign
numInteger :: (Stream s m Char) => Int -> ParsecT s u m Integer
numInteger base = numOptSign <$$> (*) <*> numNatural base
integer :: (Stream s m Char) => ParsecT s u m Integer
integer = try $ numOptSign <$$> (*) <*> (numNatural =<< numBase)
rational :: (Stream s m Char) => ParsecT s u m Rational
rational = try $ do
sign <- toRational <$> numOptSign
base <- numBase
numer <- toRational <$> numNatural base
char '/'
denom <- numDenominator base
return $ sign * numer * denom
scientific :: (Stream s m Char) => ParsecT s u m Rational
scientific = try $ do
sign <- toRational <$> numOptSign
base <- option 10 $ 16 <$ stringI "0x"
whole <- toRational <$> numNatural base
dot
mantissa <- numAfterPoint base
exponent <- option 0 $ case base of
10 -> charI 'e' *> numInteger base
16 -> charI 'h' *> numInteger base
_ -> error "unsupported base in Text.Luthor.Syntax.scientific: only 10 and 16 allowed"
let timesExp = toRational base ^^ exponent
return $ sign * (whole + mantissa) * timesExp
letterEsc :: (Stream s m Char) => [(Char, Char)] -> ParsecT s u m Char
letterEsc table = fromJust . flip lookup table <$> bsEsc (`elem` map fst table)
cEscapes :: [(Char, Char)]
cEscapes = zip "\\0abefnrtc\'\"\\" "\\\0\a\b\27\f\n\r\t\v\'\""
decimalEsc :: (Stream s m Char) => ParsecT s u m Char
decimalEsc = try $ do
char '\\'
n <- stringToInteger 10 <$> P.many1 digit
when (n > 0x10FFFF) $ unexpected "code point above U+10FFFF"
return $ chr n
asciiEsc :: (Stream s m Char) => ParsecT s u m Char
asciiEsc = try $ do
stringI "\\x"
chr . stringToInteger 16 <$> P.count 2 P.hexDigit
loUniEsc :: (Stream s m Char) => ParsecT s u m Char
loUniEsc = try $ do
P.string "\\u"
chr . stringToInteger 16 <$> P.count 4 P.hexDigit
hiUniEsc :: (Stream s m Char) => ParsecT s u m Char
hiUniEsc = try $ do
P.string "\\U"
chr . stringToInteger 16 <$> (six <||> five)
where
five = optional_ (char '0') *> P.count 5 P.hexDigit
six = string "10" <$$> (++) <*> P.count 4 P.hexDigit
uniEsc :: (Stream s m Char) => ParsecT s u m Char
uniEsc = loUniEsc <|> hiUniEsc
sqString :: (Stream s m Char) => ParsecT s u m String
sqString = between2 (char '\'') (P.many $ normal <|> escape)
where
normal = satisfy (/='\'')
escape = '\'' <$ "''"
dqString :: (Stream s m Char) => [(Char, Char)] -> ParsecT s u m String
dqString table = between2 (char '\"') (catMaybes <$> P.many (normal <|> escape <|> empty))
where
normal = (Just <$>) . satisfy $ uniPrintMinus (`elem` "\\\"")
escape = (Just <$>) $ letterEsc table <|> decimalEsc <|> asciiEsc <|> uniEsc
empty = (Nothing <$) $ void "\\&" <|> bsnlwsbs
lineComment :: (Stream s m Char) => String -> ParsecT s u m String
lineComment start = do
string start
P.anyChar `manyTill` lineBreak
blockComment :: (Stream s m Char)
=> String
-> String
-> ParsecT s u m String
blockComment start end = do
string start
P.anyChar `manyThru` string end
nestingComment :: (Stream s m Char)
=> String
-> String
-> ParsecT s u m String
nestingComment start end = do
string start
concat <$> (inner <|> text) `manyThru` string end
where
inner = nestingComment start end >>= \body -> return (start ++ body ++ end)
text = P.anyChar `manyTill` (string start <|> string end)
aChar :: (Stream s m Char) => (Char -> Bool) -> ParsecT s u m Char
aChar = satisfy
charClass :: String -> (Char -> Bool)
charClass str = case str of
('^':str') -> not . go [] [] str'
_ -> go [] [] str
where
go singles ranges [] = \c -> inRange c `any` ranges || c `elem` nub singles
go singles ranges (lo:'-':hi:rest) = go singles ((lo, hi):ranges) rest
go singles ranges (c:rest) = go (c:singles) ranges rest
inRange c (lo, hi) = lo <= c && c <= hi
uniPrint :: Char -> Bool
uniPrint c = case generalCategory c of
LineSeparator -> False
ParagraphSeparator -> False
Control -> False
Surrogate -> False
NotAssigned -> False
_ -> True
uniPrintMinus :: (Char -> Bool) -> (Char -> Bool)
uniPrintMinus p c = uniPrint c && not (p c)
uniIdClass :: Char -> Bool
uniIdClass c = case generalCategory c of
Space -> False
LineSeparator -> False
ParagraphSeparator -> False
Control -> False
Format -> False
Surrogate -> False
PrivateUse -> False
NotAssigned -> False
_ -> True
uniIdClassMinus :: (Char -> Bool) -> (Char -> Bool)
uniIdClassMinus p c = uniIdClass c && not (p c)
instance (IsString a, Stream s m Char) => IsString (ParsecT s u m a) where
fromString x = fromString <$> string x
_upAlpha c = 'A' <= c && c <= 'Z'
_loAlpha c = 'a' <= c && c <= 'z'
_alpha c = _upAlpha c || _loAlpha c
_digit c = '0' <= c && c <= '9'
_alphaNum c = _alpha c || _digit c
_asciiControl c = c <= '\31' || c == '\127'
xDigit :: (Stream s m Char) => Int -> ParsecT s u m Char
xDigit base = case base of
2 -> binDigit
8 -> P.octDigit
10 -> digit
16 -> P.hexDigit
_ -> error "unrecognized base in Text.Luthor.Syntax.xDigit (accepts only 2, 8, 10, or 16)"
xDigits :: (Stream s m Char) => Int -> ParsecT s u m String
xDigits = many1 . xDigit
stringToInteger :: Integral n => Int -> String -> n
stringToInteger base = fromIntegral . foldl impl 0
where impl acc x = acc * fromIntegral base + (fromIntegral . digitToInt) x
stringToMantissa :: Int -> String -> Ratio Integer
stringToMantissa base = (/ (fromIntegral base%1)) . foldr impl (0 % 1)
where
impl x acc = acc / (fromIntegral base%1) + digitToRatio x
digitToRatio = (%1) . fromIntegral . digitToInt