module PatternConverter ( makePattern, parseUrl ) where import InputParser import Control.Applicative hiding (many) import Text.ParserCombinators.Parsec hiding (Line, (<|>)) import Control.Monad.State import Data.List import Data.Maybe import Data.String.Utils (replace) import Data.List.Utils (split) import ParsecExt import Utils data SideBind = Hard | Soft | None deriving (Show, Eq) data UrlPattern = UrlPattern { _bindStart :: SideBind, _proto :: String, _host :: String, _query :: String, _bindEnd :: SideBind, _regex :: Bool } deriving (Show) makePattern :: Bool -> UrlPattern -> Pattern makePattern matchCase (UrlPattern bindStart proto host query bindEnd isRegex) | query' == "" = host' | otherwise = host' ++ separator' ++ query' where separator' | matchCase = "/(?-i)" | otherwise = "/" host' = case host of "" -> "" _ -> changeFirst.changeLast $ host where changeLast [] = [] changeLast ('.' : '*' : []) | query' == "" = "." | otherwise = ".*" changeLast ('*' : '.' : []) | query' == "" = "*." | otherwise = "*.*" changeLast [lst] | lst == '|' || lst `elem` hostSeparators = [] | lst == '*' && query' == "" = "*." | lst == '*' && query' /= "" = "*" | lst == '.' = "." | otherwise = lst : "*." changeLast (c:cs) = c : changeLast cs changeFirst [] = [] changeFirst (first:cs) | first == '*' = '.' : '*' : cs | first == '.' || bindStart == Hard || proto /= "" = first : cs | bindStart == Soft = '.' : first : cs | otherwise = '.' : '*' : first : cs query' = case query of "" -> "" (start:other) -> if isRegex then query else case query of '*' : '/' : other' -> replaceQuery '/' other' True '*' : '^' : other' -> replaceQuery '^' other' True _ -> replaceQuery start other (bindStart == None && host == "") where replaceQuery c cs openStart = replaceFirst c openStart ++ (join . map replaceWildcard $ cs) ++ queryEnd {- http://blogs.perl.org/users/mauke/2017/05/converting-glob-patterns-to-efficient-regexes-in-perl-and-javascript.html -} replaceFirst '*' _ = "(*PRUNE).*?" replaceFirst c openStart | c == '/' || c == '^' = if openStart then "(?:(*PRUNE).*?" ++ replaceWildcard c ++ ")?" else "" | otherwise = if openStart then "(*PRUNE).*?" ++ replaceWildcard c else replaceWildcard c queryEnd = if bindEnd == None then "" else "$" replaceWildcard c | c == '^' = "[^\\w%.-]" | c == '*' = "(*PRUNE).*?" | c `elem` special = '\\' : [c] | otherwise = [c] where special = "?$.+[]{}()\\|" -- also ^ and * are special hostSeparators :: String hostSeparators = "^/" parseUrl :: Pattern -> Either ParseError [UrlPattern] parseUrl = let raw = makeUrls <$> bindStart <*> cases urlParts <*> bindEnd in parse (join <$> (fmap.fmap) postfilter raw) "url" where makeUrls start mid end = makeUrl <$> pure start <*> mid <*> pure end makeUrl start (proto, host, query) end = UrlPattern start proto (trimTrailingNul host) query end False bindStart = (try (Soft <$ string "||") <|> try (Hard <$ string "|") <|> return None) "query start" queryEnd = (char '|' <* eof) <|> ('\0' <$ eof) <|> char '\0' "query end" bindEnd = (\c -> if c == '|' then Hard else None) <$> queryEnd port = option False $ many1 (noneOf ":") *> char ':' *> many1 (digit <|> char '*') *> optionMaybe (oneOf "/^") *> (True <$ queryEnd) trimTrailingNul :: String -> String trimTrailingNul [] = [] trimTrailingNul (c:cs) | cs == [] && c == '\0' = [] | otherwise = c : trimTrailingNul(cs) hostChar :: Parser Char hostChar = alphaNum <|> oneOf ".-:" protocols :: [String] protocols = ["https://", "http://"] protocolsSeparator :: String protocolsSeparator = ";" protocolChar :: Parser Char protocolChar = oneOf (delete '/' $ nub $ join protocols) postfilter :: UrlPattern -> [UrlPattern] postfilter url@(UrlPattern bs proto host query be _) = regular ++ regex -- ++ www where regex = if proto == "" && host == "" && "/" `isPrefixOf` query && length query > 2 && "/" `isSuffixOf` query then let query' = take (length query - 2) . drop 1 $ query in [UrlPattern bs "" "" query' be True] else [] regular = let leftBound = bs /= None || proto /= "" rightBound = be /= None || query /= "" orphanQuery = leftBound && host == "" && query /= "" && not ("*" `isPrefixOf` query) duplicateHostStar = host == "*" hostHasDot = isJust $ find (\c -> c == '.' || c == '*') host firstLevelHost = host /= "" && not hostHasDot && leftBound && rightBound hasLegalPort = case parse port "host" host of Right val -> val _ -> False hasIllegalPort = not hasLegalPort && ":" `isInfixOf` host in if not (orphanQuery || duplicateHostStar || firstLevelHost || hasIllegalPort) then let query' = if "*" `isSuffixOf` host && query /= "" then '*' : query else query in [url {_query = query'}] else [] -- TODO: process port as an url part urlParts :: [StringStateParser (String,String,String)] urlParts = square3 proto (manyCases host) (oneCase query) where append xs x = xs ++ [x] proto :: StringStateParser String proto = do masksString <- get case masksString of Nothing -> do put $ Just $ intercalate protocolsSeparator protocols return "" --allow to skip proto Just masksString' -> do let masks = split protocolsSeparator masksString' if null masks then lift pzero -- no continuations available (parser have finished on previous iteration) else do lift $ skipMany $ char '*' --skip leading * if presented name <- lift $ many1 protocolChar sep <- lift $ many $ oneOf hostSeparators let chars = name ++ replace "^" "//" sep -- concatenate input and expand separator wildcard nextChar <- lift $ lookAhead anyChar let masks' = filterProtoMasks masks chars nextChar -- find possible continuations for current input if null masks' || null chars then lift pzero -- fail parser if no continuations or no chars read else do put $ Just $ if isJust (find null masks') -- if empty continuation found (i.e. parser finished) then "" -- make no continuations available next time else intercalate protocolsSeparator masks' return $ if nextChar == '*' then chars ++ "*" else chars host = try (append <$> many hostChar <*> char '*') <|> try (append <$> many1 hostChar <*> lookAhead separator) "host" separator = (oneOf hostSeparators <|> queryEnd) "separator" query = notFollowedBy (try $ string "//") *> manyTill anyChar (lookAhead (try queryEnd)) "query" filterProtoMasks :: [String] -> String -> Char -> [String] filterProtoMasks masks chars nextChar = mapMaybe filterProtoMask masks where filterProtoMask mask = if nextChar /= '*' then if chars `isSuffixOf` mask then Just "" else Nothing else let tailFound = find (chars `isPrefixOf`) (tails mask) in drop (length chars) <$> tailFound