{-# LANGUAGE OverloadedStrings #-}
module Hasmin.Parser.Utils (
ident
, fontfamilyname
, nonquotedurl
, skipComments
, lexeme
, functionParser
, comma
, colon
, opt
, nmchar
) where
import Control.Applicative ((<|>), many)
import Control.Monad (void, mzero)
import Data.Attoparsec.Text (char,
option, Parser, satisfy, skipSpace, string, takeWhile1, (<?>))
import Data.Monoid ((<>))
import Data.Text (Text)
import Data.Text.Lazy.Builder as LB
import qualified Data.Attoparsec.Text as A
import qualified Data.Char as C
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
skipComments :: Parser ()
skipComments = void $ many (skipSpace *> comment) <* skipSpace
comment :: Parser Text
comment = mappend <$> string "/*" <*> (string "*/" <|> untilAsterisk)
where untilAsterisk = mappend <$> A.takeWhile (/= '*') <*> checkAsterisk
checkAsterisk = mappend <$> string "*" <*> (string "/" <|> untilAsterisk)
comma :: Parser Char
comma = lexeme $ char ','
colon :: Parser Char
colon = lexeme $ char ':'
lexeme :: Parser a -> Parser a
lexeme p = skipComments *> p <* skipComments
opt :: Monoid m => Parser m -> Parser m
opt = option mempty
nonquotedurl :: Parser Text
nonquotedurl = do
t <- many (escape <|> (LB.singleton <$> satisfy validChar))
pure $ TL.toStrict (toLazyText (mconcat t))
where validChar x = x /= '\"' && x /= '\'' && x /= '(' && x /= ')'
&& x /= '\\' && notWhitespace x && notNonprintable x
notWhitespace x = x /= '\n' && x /= '\t' && x /= ' '
notNonprintable x = not (C.chr 0 <= x && x <= C.chr 8)
&& x /= '\t'
&& not ('\SO' <= x && x <= C.chr 31)
&& x /= '\DEL'
fontfamilyname :: Parser Text
fontfamilyname = do
i <- ident
is <- many (skipComments *> ident)
if T.toLower i `elem` invalidNames
then mzero
else pure $ i <> mconcat (map (" "<>) is)
where invalidNames = ["serif", "sans-serif", "monospace", "cursive",
"fantasy", "inherit", "initial", "unset", "default"]
ident :: Parser Text
ident = do
dash <- option mempty (LB.singleton <$> char '-')
ns <- nmstart
nc <- mconcat <$> many nmchar
pure $ TL.toStrict (toLazyText (dash <> ns <> nc))
nmstart :: Parser Builder
nmstart = LB.singleton <$> satisfy (\c -> C.isAlpha c || (not . C.isAscii) c || c == '_')
<|> escape
<?> "not an nmstart token: [_a-z]|{nonascii}|{escape}"
nmchar :: Parser Builder
nmchar = LB.singleton <$> satisfy cond <|> escape
where cond x = C.isAlphaNum x || x == '_' || x == '-'
|| (not . C.isAscii) x
escape :: Parser Builder
escape = unicode
<|> (mappend <$> (LB.singleton <$> char '\\') <*> (LB.singleton <$> satisfy cond))
<?> "not an escape token: {unicode}|\\\\[^\\n\\r\\f0-9a-f]"
where cond c = c /= '\n'
&& c /= '\r'
&& c /= '\f'
&& (not . C.isHexDigit) c
unicode :: Parser Builder
unicode = do
backslash <- char '\\'
hexChars <- takeWhile1 C.isHexDigit
_ <- opt (string "\r\n" <|> (T.singleton <$> satisfy ws))
if T.length hexChars <= 6
then pure $ LB.singleton backslash <> LB.fromText hexChars
else fail "unicode escaped character with length greater than 6"
where ws x = x == ' ' || x == '\n' || x == '\r' || x == '\t' || x == '\f'
functionParser :: Parser a -> Parser a
functionParser p = lexeme p <* char ')'