module Burrito.Parse
( parse
) where
import qualified Burrito.Type.Character as Character
import qualified Burrito.Type.Expression as Expression
import qualified Burrito.Type.Literal as Literal
import qualified Burrito.Type.Modifier as Modifier
import qualified Burrito.Type.Name as Name
import qualified Burrito.Type.NonEmpty as NonEmpty
import qualified Burrito.Type.Operator as Operator
import qualified Burrito.Type.Template as Template
import qualified Burrito.Type.Token as Token
import qualified Burrito.Type.Variable as Variable
import qualified Control.Applicative as Applicative
import qualified Control.Monad as Monad
import qualified Data.Char as Char
import qualified Data.Maybe as Maybe
import qualified Data.Word as Word
parse :: String -> Maybe Template.Template
parse string = case runParser parseTemplate string of
Just (template, "") -> Just template
_ -> Nothing
intToWord8 :: Int -> Word.Word8
intToWord8 x =
let
lo = word8ToInt (minBound :: Word.Word8)
hi = word8ToInt (maxBound :: Word.Word8)
in if x < lo
then error $ "intToWord8: " <> show x <> " < " <> show lo
else if x > hi
then error $ "intToWord8: " <> show x <> " > " <> show hi
else fromIntegral x
word8ToInt :: Word.Word8 -> Int
word8ToInt = fromIntegral
newtype Parser a = Parser
{ runParser :: String -> Maybe (a, String)
}
instance Functor Parser where
fmap f p = Parser $ \s -> case runParser p s of
Nothing -> Nothing
Just (x, t) -> Just (f x, t)
instance Applicative Parser where
pure x = Parser $ \s -> Just (x, s)
p <*> q = Parser $ \s -> case runParser p s of
Nothing -> Nothing
Just (f, t) -> case runParser q t of
Nothing -> Nothing
Just (x, u) -> Just (f x, u)
instance Monad Parser where
p >>= f = Parser $ \s -> case runParser p s of
Nothing -> Nothing
Just (x, t) -> runParser (f x) t
instance Applicative.Alternative Parser where
empty = Parser $ const Nothing
p <|> q = Parser $ \s -> case runParser p s of
Nothing -> runParser q s
Just (x, t) -> Just (x, t)
parseAny :: Parser Char
parseAny = Parser $ \string -> case string of
"" -> Nothing
first : rest -> Just (first, rest)
parseBetween :: Parser before -> Parser after -> Parser a -> Parser a
parseBetween before after parser = before *> parser <* after
parseChar :: Char -> Parser Char
parseChar = parseIf . (==)
parseChar_ :: Char -> Parser ()
parseChar_ = Monad.void . parseChar
parseEither :: Parser a -> Parser a -> Parser a
parseEither = (Applicative.<|>)
parseIf :: (Char -> Bool) -> Parser Char
parseIf predicate = do
char <- parseAny
if predicate char then pure char else Applicative.empty
parseNonEmpty :: Parser a -> Parser (NonEmpty.NonEmpty a)
parseNonEmpty parser = nonEmpty <$> parser <*> Applicative.many parser
parseSepBy1 :: Parser separator -> Parser a -> Parser (NonEmpty.NonEmpty a)
parseSepBy1 separator parser =
nonEmpty <$> parser <*> Applicative.many (separator *> parser)
parseTemplate :: Parser Template.Template
parseTemplate = Template.Template <$> Applicative.many parseToken
parseToken :: Parser Token.Token
parseToken = parseEither
(Token.Literal <$> parseLiteral)
(Token.Expression <$> parseExpression)
parseLiteral :: Parser Literal.Literal
parseLiteral = Literal.Literal <$> parseNonEmpty parseCharacter
parseCharacter :: Parser Character.Character
parseCharacter = parseEither parseCharacterUnencoded parseCharacterEncoded
parseCharacterUnencoded :: Parser Character.Character
parseCharacterUnencoded = Character.Unencoded <$> parseIf isLiteral
parseCharacterEncoded :: Parser Character.Character
parseCharacterEncoded = do
(hi, lo) <- parsePercentEncoded
pure . Character.Encoded $ intToWord8
(Char.digitToInt hi * 16 + Char.digitToInt lo)
parseExpression :: Parser Expression.Expression
parseExpression =
parseBetween (parseChar_ '{') (parseChar_ '}')
$ Expression.Expression
<$> parseOperator
<*> parseVariableList
parseVariableList :: Parser (NonEmpty.NonEmpty Variable.Variable)
parseVariableList = parseSepBy1 (parseChar_ ',') parseVarspec
parseVarspec :: Parser Variable.Variable
parseVarspec = do
name <- parseVarname
modifier <- parseModifier
pure $ Variable.Variable { Variable.name = name, Variable.modifier = modifier }
parseVarname :: Parser Name.Name
parseVarname = do
first <- parseVarcharFirst
rest <- Applicative.many parseVarcharRest
pure . Name.Name $ combine first rest
parseVarcharFirst :: Parser (NonEmpty.NonEmpty Char)
parseVarcharFirst = parseEither parseVarcharUnencoded parseVarcharEncoded
parseVarcharUnencoded :: Parser (NonEmpty.NonEmpty Char)
parseVarcharUnencoded = NonEmpty.singleton <$> parseIf isVarchar
parseVarcharEncoded :: Parser (NonEmpty.NonEmpty Char)
parseVarcharEncoded = do
(hi, lo) <- parsePercentEncoded
pure $ nonEmpty '%' [hi, lo]
parseVarcharRest :: Parser (NonEmpty.NonEmpty Char)
parseVarcharRest = parseEither
(nonEmpty <$> parseChar '.' <*> fmap NonEmpty.toList parseVarcharFirst)
parseVarcharFirst
isVarchar :: Char -> Bool
isVarchar x = case x of
'_' -> True
_ -> isAlpha x || Char.isDigit x
combine :: NonEmpty.NonEmpty a -> [NonEmpty.NonEmpty a] -> NonEmpty.NonEmpty a
combine xs =
nonEmpty (NonEmpty.first xs)
. mappend (NonEmpty.rest xs)
. concatMap NonEmpty.toList
nonEmpty :: a -> [a] -> NonEmpty.NonEmpty a
nonEmpty = NonEmpty.NonEmpty
parsePercentEncoded :: Parser (Char, Char)
parsePercentEncoded = do
parseChar_ '%'
(,) <$> parseIf Char.isHexDigit <*> parseIf Char.isHexDigit
parseOperator :: Parser Operator.Operator
parseOperator =
Maybe.fromMaybe Operator.None <$> Applicative.optional parseRequiredOperator
parseRequiredOperator :: Parser Operator.Operator
parseRequiredOperator = do
operator <- parseIf isOperator
maybe Applicative.empty pure $ toOperator operator
toOperator :: Char -> Maybe Operator.Operator
toOperator x = case x of
'+' -> Just Operator.PlusSign
'#' -> Just Operator.NumberSign
'.' -> Just Operator.FullStop
'/' -> Just Operator.Solidus
';' -> Just Operator.Semicolon
'?' -> Just Operator.QuestionMark
'&' -> Just Operator.Ampersand
_ -> Nothing
isOperator :: Char -> Bool
isOperator x = isOpLevel2 x || isOpLevel3 x || isOpReserve x
isOpLevel2 :: Char -> Bool
isOpLevel2 x = case x of
'+' -> True
'#' -> True
_ -> False
isOpLevel3 :: Char -> Bool
isOpLevel3 x = case x of
'.' -> True
'/' -> True
';' -> True
'?' -> True
'&' -> True
_ -> False
isOpReserve :: Char -> Bool
isOpReserve x = case x of
'=' -> True
',' -> True
'!' -> True
'@' -> True
'|' -> True
_ -> False
parseModifier :: Parser Modifier.Modifier
parseModifier =
fmap (Maybe.fromMaybe Modifier.None) . Applicative.optional $ parseEither
parsePrefixModifier
parseExplodeModifier
parsePrefixModifier :: Parser Modifier.Modifier
parsePrefixModifier = do
parseChar_ ':'
Modifier.Colon <$> parseMaxLength
parseMaxLength :: Parser Int
parseMaxLength = do
first <- parseNonZeroDigit
rest <- parseUpTo 3 parseDigit
pure . fromDigits $ nonEmpty first rest
fromDigits :: NonEmpty.NonEmpty Int -> Int
fromDigits = foldr1 ((+) . (10 *)) . NonEmpty.toList
parseUpTo :: Int -> Parser a -> Parser [a]
parseUpTo = parseUpToWith []
parseUpToWith :: [a] -> Int -> Parser a -> Parser [a]
parseUpToWith accumulator remaining parser = if remaining < 1
then pure accumulator
else do
result <- Applicative.optional parser
case result of
Nothing -> pure accumulator
Just value -> parseUpToWith (value : accumulator) (remaining - 1) parser
parseNonZeroDigit :: Parser Int
parseNonZeroDigit = Char.digitToInt <$> parseIf isNonZeroDigit
isNonZeroDigit :: Char -> Bool
isNonZeroDigit x = case x of
'0' -> False
_ -> Char.isDigit x
parseDigit :: Parser Int
parseDigit = Char.digitToInt <$> parseIf Char.isDigit
isAlpha :: Char -> Bool
isAlpha x = Char.isAsciiUpper x || Char.isAsciiLower x
parseExplodeModifier :: Parser Modifier.Modifier
parseExplodeModifier = Modifier.Asterisk <$ parseChar_ '*'
isLiteral :: Char -> Bool
isLiteral x = case x of
' ' -> False
'"' -> False
'\'' -> False
'%' -> False
'<' -> False
'>' -> False
'\\' -> False
'^' -> False
'`' -> False
'{' -> False
'|' -> False
'}' -> False
_ -> between '\x20' '\x7e' x || isUcschar x || isIprivate x
isUcschar :: Char -> Bool
isUcschar x =
between '\xa0' '\xd7ff' x
|| between '\xf900' '\xfdcf' x
|| between '\xfdf0' '\xffef' x
|| between '\x10000' '\x1fffd' x
|| between '\x20000' '\x2fffd' x
|| between '\x30000' '\x3fffd' x
|| between '\x40000' '\x4fffd' x
|| between '\x50000' '\x5fffd' x
|| between '\x60000' '\x6fffd' x
|| between '\x70000' '\x7fffd' x
|| between '\x80000' '\x8fffd' x
|| between '\x90000' '\x9fffd' x
|| between '\xa0000' '\xafffd' x
|| between '\xb0000' '\xbfffd' x
|| between '\xc0000' '\xcfffd' x
|| between '\xd0000' '\xdfffd' x
|| between '\xe1000' '\xefffd' x
isIprivate :: Char -> Bool
isIprivate x =
between '\xe000' '\xf8ff' x
|| between '\xf0000' '\xffffd' x
|| between '\x100000' '\x10fffd' x
between
:: Ord a
=> a
-> a
-> a
-> Bool
between lo hi x = lo <= x && x <= hi