{-# LANGUAGE OverloadedStrings #-} ----------------------------------------------------------------------------- -- | -- Module : Hasmin.Parser.Primitives -- Copyright : (c) 2017 Cristian Adrián Ontivero -- License : BSD3 -- Stability : experimental -- Portability : unknown -- -- Parsers for very basic CSS grammar tokens. -- ----------------------------------------------------------------------------- module Hasmin.Parser.Primitives ( ident , escape , unicode , nmstart , nmchar , int , int' , digits ) where import Control.Applicative ((<|>), some, many) import Data.Attoparsec.Text (Parser, ()) import Data.Monoid ((<>)) import Data.Text.Lazy.Builder (Builder) import Data.Text (Text) import qualified Data.Attoparsec.Text as A import qualified Data.Char as C import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as B -- TODO combine with unicode to make it more efficient -- escape: {unicode}|\\[^\n\r\f0-9a-f] escape :: Parser Builder escape = unicode <|> (mappend <$> (B.singleton <$> A.char '\\') <*> (B.singleton <$> A.satisfy cond)) "not an escape token: {unicode}|\\\\[^\\n\\r\\f0-9a-f]" where cond c = c /= '\n' && c /= '\r' && c /= '\f' && (not . C.isHexDigit) c -- unicode \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])? unicode :: Parser Builder unicode = do backslash <- A.char '\\' hexChars <- A.takeWhile1 C.isHexDigit _ <- A.option mempty (A.string "\r\n" <|> (T.singleton <$> A.satisfy ws)) if T.length hexChars <= 6 then pure $ B.singleton backslash <> B.fromText hexChars else fail "unicode escaped character with length greater than 6" where ws x = x == ' ' || x == '\n' || x == '\r' || x == '\t' || x == '\f' -- ident: -?-?{nmstart}{nmchar}* ident :: Parser Text ident = do dash <- (B.fromText <$> A.string "--") <|> (B.singleton <$> A.char '-') <|> pure mempty ns <- nmstart nc <- mconcat <$> many nmchar pure $ TL.toStrict (B.toLazyText (dash <> ns <> nc)) -- nmstart: [_a-z]|{nonascii}|{escape} nmstart :: Parser Builder nmstart = B.singleton <$> A.satisfy (\c -> C.isAlpha c || (not . C.isAscii) c || c == '_') <|> escape "not an nmstart token: [_a-z]|{nonascii}|{escape}" -- nmchar: [_a-z0-9-]|{nonascii}|{escape} nmchar :: Parser Builder nmchar = B.singleton <$> A.satisfy cond <|> escape where cond x = C.isAlphaNum x || x == '_' || x == '-' || (not . C.isAscii) x -- | \ data type parser, but into a String instead of an Int, for other -- parsers to use (e.g.: see the parsers int, or rational) int' :: Parser String int' = do sign <- A.char '-' <|> pure '+' d <- digits case sign of '+' -> pure d '-' -> pure (sign:d) _ -> error "int': parsed a number starting with other than [+|-]" -- | Parser for \: [+|-][0-9]+ int :: Parser Int int = read <$> int' -- | Parser one or more digits. digits :: Parser String digits = some A.digit