{-# LANGUAGE OverloadedStrings #-} module Text.Regex.Regen.CClass where import Prelude hiding (takeWhile) import Control.Applicative (Alternative(..)) import Data.Attoparsec.ByteString.Char8 (isDigit, isSpace, isHorizontalSpace) import Data.Char (ord) import Data.Functor (($>)) import Data.List ((\\)) import Text.Regex.Regen.ParserST import Text.Regex.Regen.Pattern import Text.Regex.Regen.PatternException import Text.Regex.Regen.Util bracketClass :: ParserST Part bracketClass = bracketed $ negative <|> positive where positive = fmap (CClass True) $ go [] negative = fmap (CClass False) $ char '^' *> go [] cpart = posixRange <|> backslashRange <|> dashRange <|> singleton go acc = do acc' <- (acc ++) <$> cpart mb <- peekChar case mb of Just ']' -> return acc' _ -> go acc' backslashClass :: ParserST Part backslashClass = do o <- pOptions <$> getPattern dot o <|> bsR <|> bsN o <|> range where dot o = string "." $> CClass False (munless (oDotAll o) (oLineEndChars o)) bsN o = string "\\N" $> CClass False (oLineEndChars o) bsR = string "\\R" $> CClass True ['\r','\n','\f','\v'] range = CClass True <$> backslashRange posixRange :: ParserST [Char] posixRange = toPC =<< string "[:" *> pcname <* string ":]" where toPC ('^':s) = negate <$> toPC s toPC "alnum" = return $ ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] toPC "alpha" = return $ ['a'..'z'] ++ ['A'..'Z'] toPC "ascii" = return $ ['\x00'..'\x7f'] toPC "blank" = return $ [' ','\t'] toPC "cntrl" = return $ '\x7f' : ['\x00'..'\x1f'] toPC "digit" = return $ ['0'..'9'] toPC "graph" = return $ ['\x21'..'\x7e'] toPC "lower" = return $ ['a'..'z'] toPC "print" = return $ ['\x20'..'\x7e'] toPC "punct" = return $ "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" toPC "space" = return $ " \t\r\n\v\f" toPC "word" = return $ '_' : ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] toPC "xdigit" = return $ ['a'..'f'] ++ ['A'..'Z'] ++ ['0'..'9'] toPC name = eNoSuchClass name negate cs = ['\x00'..'\xff'] \\ cs pcname = many $ satisfy (/=':') <|> char ':' `notFollowedBy` ']' backslashRange :: ParserST [Char] backslashRange = let bytes = ['\x00'..'\xff'] in choice [ string "\\d" $> [ c | c <- bytes, isDigit c ] , string "\\D" $> [ c | c <- bytes, not (isDigit c) ] , string "\\h" $> [ c | c <- bytes, isHSpace c ] , string "\\H" $> [ c | c <- bytes, not (isHSpace c) ] , string "\\s" $> [ c | c <- bytes, isSpace c ] , string "\\S" $> [ c | c <- bytes, not (isSpace c) ] , string "\\v" $> "\n\v\f\r" , string "\\V" $> bytes \\ "\n\v\f\r" , string "\\w" $> '_' : ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] , string "\\W" $> bytes \\ ('_' : ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9']) , string "\\N" >> eNotSupported "'\\N' in character class" , string "\\L" >> eNotSupported "'\\L'" , string "\\l" >> eNotSupported "'\\l'" , string "\\U" >> eNotSupported "'\\U'" , string "\\u" >> eNotSupported "'\\u'" , string "\\Q" >> eNotSupported "'\\Q' in character class" , string "\\E" >> eNotSupported "'\\E' in character class" , string "\\p{" *> some letter <* char '}' >> eNoProps , string "\\p" *> letter >> eNoProps , fmap (bytes \\) $ string "\\P{" *> some letter <* char '}' >> eNoProps , fmap (bytes \\) $ string "\\P" *> letter >> eNoProps ] where eNoProps = eNotSupported "properties" isHSpace = isHorizontalSpace . fromIntegral . ord dashRange :: ParserST [Char] dashRange = mkRange =<< (,) <$> singleton <* char '-' <*> singleton where mkRange p = case p of ([a],[b]) | a <= b -> pure [a..b] ([_],[_]) -> eClassOutOfOrder _ -> eParserError "unreachable" singleton :: ParserST [Char] singleton = (:[]) <$> choice [ escaped hexByte , escaped octalByte , escaped cescape , escaped literal , literal ] where literal = satisfy (/='\\')