{- | Parser functions for CSS3 selectors. -} module Text.CSS3.Selectors.Parser ( -- * Parsing Functions parseSelectorGroup, parseSelector, sel, -- * CSS Selector Parsers selectorGroup, selector, simpleSelectorSequence, headSimpleSelector, tailSimpleSelector, attributeSelector, attributeOperator, combinator, pseudoClass, pseudoClassParameter, -- * Lexical Syntax Parsers whitespace, identifier, name, nameStartLetter, nameCharLetter ) where import Control.Applicative import Control.Monad import Data.Char import Data.Maybe import Numeric import Text.CSS3.Selectors.Syntax import Text.Parsec hiding ((<|>), optional, newline, many, Empty) import qualified Text.Parsec as Parsec ((<|>), optional, newline, many) import Text.Parsec.String {- * http://www.w3.org/TR/selectors/ * http://www.w3.org/TR/css-syntax-3/ -} {- Selector ::= SimpleSelectorSequence [PseudoElement] | SimpleSelectorSequence Combinator Selector SimpleSelectorSequence ::= (TypeSelector | UniversalSelector) (AttributeSelector | ClassSelector | IDSelector | PseudoClass)* | (AttributeSelector | ClassSelector | IDSelector | PseudoClass)+ SimpleSelector ::= (TypeSelector | UniversalSelector | AttributeSelector | ClassSelector | IDSelector | PseudoClass) Combinator ::= [WhiteSpace] (' ' | '>' | '+' | '~') [WhiteSpace] WhiteSpace ::= ' ' | '\t' | '\n' | '\r' | '\f' Group ::= Selector | Selector [WhiteSpace] ',' [WhiteSpace] Group TypeSelector ::= identifier UniversalSelector ::= '*' AttributeSelector ::= '[' Attribute ']' | '[' Attribute AttributeOperator AttributeValue ']' Attribute ::= identifier | String AttributeOperator ::= '=' | '~=' | '|=' | '^=' | '$=' | '*=' IDSelector ::= '#' identifier ClassSelector ::= '.' AttributeValue PseudoClass ::= ':' (StructuralPseudoClass | 'not(' SimpleSelector ')') StructuralPseudoClass ::= 'root' | 'nth-child' PseudoClassParameter | 'nth-last-child' PseudoClassParameter | 'nth-of-type' PseudoClassParameter | 'nth-last-of-type' PseudoClassParameter | 'first-child' | 'last-child' | 'first-of-type' | 'last-of-type' | 'only-child' | 'only-of-type' | 'empty' -} -- * Parsing Functions -- | Tries to parse a selector group. parseSelectorGroup :: String -> Either ParseError SelectorGroup parseSelectorGroup = parse (selectorGroup <* eof) "" -- | Tries to parse a single selector. parseSelector :: String -> Either ParseError Selector parseSelector = parse (selector <* eof) "" -- | Parses a single selector and fails with an error if the string cannot be parsed correctly. -- This function is intended for testing purposes only. sel :: String -> Selector sel = either (error . show) id . parse (selector <* eof) "" -- * CSS Selector Parsers selectorGroup :: Parser SelectorGroup selectorGroup = SelectorGroup <$> selector <*> many (many whitespace *> char ',' *> many whitespace *> selector) selector :: Parser Selector selector = Selector <$> simpleSelectorSequence <*> optional (try $ (,) <$> combinator <*> selector) simpleSelectorSequence :: Parser SimpleSelectorSequence simpleSelectorSequence = SimpleSelectorSequence <$> headSimpleSelector <*> many tailSimpleSelector <|> SimpleSelectorSequence UniversalSelector <$> many1 tailSimpleSelector headSimpleSelector :: Parser HeadSimpleSelector headSimpleSelector = TypeSelector <$> identifier <|> UniversalSelector <$ char '*' tailSimpleSelector :: Parser TailSimpleSelector tailSimpleSelector = attributeSelector <|> ClassSelector <$> (char '.' *> identifier) <|> IDSelector <$> (char '#' *> name) <|> PseudoClass <$> (char ':' *> pseudoClass) attributeSelector :: Parser TailSimpleSelector attributeSelector = AttributeSelector <$> (char '[' *> many whitespace *> identifier <* many whitespace) <*> optional ((,) <$> attributeOperator <* many whitespace <*> (identifier <|> quotedString) <* many whitespace) <* char ']' attributeOperator :: Parser AttributeOperator attributeOperator = ExactMatch <$ string "=" <|> IncludesMatch <$ string "~=" <|> DashMatch <$ string "|=" <|> PrefixMatch <$ string "^=" <|> SuffixMatch <$ string "$=" <|> InfixMatch <$ string "*=" combinator :: Parser Combinator combinator = try (Child <$ (many whitespace *> char '>' <* many whitespace)) <|> try (AdjacentSibling <$ (many whitespace *> char '+' <* many whitespace)) <|> try (GeneralSibling <$ (many whitespace *> char '~' <* many whitespace)) <|> Descendant <$ many1 whitespace pseudoClass :: Parser PseudoClass pseudoClass = Root <$ try (string "root") <|> NthChild <$> (try (string "nth-child") *> parameter pseudoClassParameter) <|> NthLastChild <$> (try (string "nth-last-child") *> parameter pseudoClassParameter) <|> NthOfType <$> (try (string "nth-of-type") *> parameter pseudoClassParameter) <|> NthLastOfType <$> (try (string "nth-last-of-type") *> parameter pseudoClassParameter) <|> FirstChild <$ try (string "first-child") <|> LastChild <$ try (string "last-child") <|> FirstOfType <$ try (string "first-of-type") <|> LastOfType <$ try (string "last-of-type") <|> OnlyChild <$ try (string "only-child") <|> OnlyOfType <$ try (string "only-of-type") <|> Empty <$ try (string "empty") <|> Not <$> (try kwNot *> parameter (Left <$> headSimpleSelector <|> Right <$> tailSimpleSelector)) where parameter p = char '(' *> many whitespace *> p <* many whitespace <* char ')' {- nth : S* [ ['-'|'+']? INTEGER? {N} [ S* ['-'|'+'] S* INTEGER ]? | ['-'|'+']? INTEGER | {O}{D}{D} | {E}{V}{E}{N} ] S* -} pseudoClassParameter :: Parser PseudoClassParameter pseudoClassParameter = PseudoClassParameter <$> (try $ option id sign <*> option 1 integer <* oneOf "nN") <*> (option 0 $ many whitespace *> sign <* many whitespace <*> integer) <|> PseudoClassParameter 0 <$> (option id sign <*> integer) <|> PseudoClassParameter 2 1 <$ kwOdd <|> PseudoClassParameter 2 0 <$ kwEven -- * Lexical Syntax Parsers -- According to sign :: Parser (Integer -> Integer) sign = negate <$ char '-' <|> id <$ char '+' -- integer = [0-9]+ integer :: Parser Integer integer = foldl (\a c -> 10*a + toInteger (ord c) - 48) 0 <$> many1 digit -- num [0-9]+|[0-9]*\.[0-9]+ -- identifier = [-]?{nameStartLetter}{nameCharLetter}* identifier :: Parser String identifier = (\d c cs -> (maybe id (:) d) $ c : cs) <$> optional (char '-') <*> nameStartLetter <*> many nameCharLetter -- name = {nameCharLetter}+ name :: Parser String name = many1 nameCharLetter -- nameStartLetter = [_a-z]|{nonAsciiLetter}|{escapedLetter} nameStartLetter :: Parser Char nameStartLetter = asciiLetter <|> char '_' <|> nonAsciiLetter <|> escapedLetter -- nameCharLetter = [_a-z0-9-]|{nonAsciiLetter}|{escapedLetter} nameCharLetter :: Parser Char nameCharLetter = char '_' <|> asciiLetter <|> digit <|> char '-' <|> nonAsciiLetter <|> escapedLetter -- asciiLatter = [_a-z] asciiLetter :: Parser Char asciiLetter = satisfy $ \c -> c >= 'a' && c <= 'z' || c >='A' && c <= 'Z' -- nonAsciiLetter = [\240-\377] -- CSS 2.1 -- nonAsciiLetter = [^\0-\177] -- CSS 3 nonAsciiLetter :: Parser Char nonAsciiLetter = satisfy (\c -> ord c > 127) -- unicodeLetter = \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])? unicodeLetter :: Parser Char unicodeLetter = char '\\' *> (chr . fst . head . readHex <$> countFromTo 1 6 (satisfy isHexDigit)) <* optional (try (string "\r\n") <|> pure <$> whitespace) -- escapedLetter = {unicode}|\\[^\n\r\f0-9a-f] escapedLetter :: Parser Char escapedLetter = try unicodeLetter <|> char '\\' *> satisfy (\c -> not (isHexDigit c) && c `notElem` "\n\r\f") {- quotedString = {string1}|{string2} string1 = \"([^\n\r\f\\"]|\\{newline}|{nonAsciiLetter}|{escapedLetter})*\" string2 = \'([^\n\r\f\\']|\\{newline}|{nonAsciiLetter}|{escapedLetter})*\' -} quotedString :: Parser String quotedString = char '\"' *> (concat <$> many (stringChar '\"')) <* char '\"' <|> char '\'' *> (concat <$> many (stringChar '\'')) <* char '\'' where stringChar :: Char -> Parser String stringChar delim = pure <$> satisfy (`notElem` (delim : "\n\r\f\\")) <|> try (char '\\' *> newline) -- <|> pure <$> nonAsciiLetter -- Can this ever be reached? <|> pure <$> escapedLetter -- newline = \n|\r\n|\r|\f newline :: Parser String newline = try (string "\r\n") <|> pure <$> oneOf "\n\r\f" -- whitespace = [ \t\r\n\f]* whitespace :: Parser Char whitespace = oneOf " \t\n\r\f" {- D d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])? E e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])? N n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n O o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o T t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t V v|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\v -} kwNot, kwOdd, kwEven :: Parser String kwNot = sequence [oneOf "nN", oneOf "oO", oneOf "tT"] kwOdd = sequence [oneOf "oO", oneOf "dD", oneOf "dD"] kwEven = sequence [oneOf "eE", oneOf "vV", oneOf "eE", oneOf "nN"] -- * Utility Parsers countFromTo :: Stream s m t => Int -> Int -> ParsecT s u m a -> ParsecT s u m [a] countFromTo n m p | m < n = pure [] | otherwise = (++) <$> count n p <*> (catMaybes <$> replicateM (m - n) (optional p))