module Text.Matchers ( Matcher(..) , CaseSensitive(..) , pcre , within , exact , CompUTC(..) , descUTC , compUTCtoCmp , date ) where import Control.Applicative ((<$>), (<*>), (<*), (<$), optional, (<|>)) import Control.Monad (replicateM, mzero) import qualified Data.ByteString as BS import Data.Fixed (Pico) import Data.Maybe (fromMaybe, isJust) import Data.Text (Text, pack, unpack, toCaseFold, isInfixOf) import Data.Text.Encoding (encodeUtf8) import qualified Text.Regex.PCRE.Light as PCRE import Text.Parsec (many, satisfy) import qualified Text.Parsec as P import Text.Parsec.Text (Parser) import qualified Data.Time as Time data CaseSensitive = Sensitive | Insensitive deriving (Eq, Ord, Show) data Matcher = Matcher { shortDesc :: Text -- ^ Short description of this matcher, e.g. @PCRE@ or @Exact@. , matchDesc :: Text -- ^ Description of a successful match, e.g. -- @Matches the PCRE pattern abc@, or -- @a valid date with optional time@. , match :: Text -> Bool -- ^ Function to carry out the match } descSensitive :: CaseSensitive -> String descSensitive c = case c of Sensitive -> " (case sensitive)" Insensitive -> " (case insensitive)" -- | Uses the PCRE regular expression engine. Currently the pcre-light -- package is used, as it has a simpler interface than the -- regex-pcre-builtin. It should work correctly with Unicode. pcre :: CaseSensitive -> Text -- ^ Pattern -> Either Text Matcher -- ^ The Matcher if the pattern is good; if the pattern is bad, -- returns an error message. pcre c t = case pcrePrim c (encodeUtf8 t) of Left e -> Left $ pack e Right f -> let sDesc = pack "Perl-compatible regular expression" mrDesc = pack $ "matches the PCRE pattern \"" ++ unpack t ++ "\"" ++ descSensitive c mr = f . encodeUtf8 in return $ Matcher sDesc mrDesc mr -- | Matcher that succeeds if the pattern text is found anywhere -- within the subject. within :: CaseSensitive -> Text -- ^ The pattern -> Matcher within cs t = Matcher sDesc mrDesc mr where sDesc = pack "within" mrDesc = pack $ "contains the text \"" ++ unpack t ++ "\"" ++ descSensitive cs mr = txtMatch isInfixOf cs t -- | Matcher that succeeds if the pattern text exactly matches the -- subject (with case sensitivity as appropriate.) exact :: CaseSensitive -> Text -> Matcher exact cs t = Matcher sDesc mrDesc mr where sDesc = pack "exact" mrDesc = pack $ "matches the text \"" ++ unpack t ++ "\"" ++ descSensitive cs mr = txtMatch (==) cs t txtMatch :: (Text -> Text -> Bool) -> CaseSensitive -> Text -> Text -> Bool txtMatch f c p t = pat `f` txt where txt = flipCase t pat = flipCase p flipCase = case c of Sensitive -> id Insensitive -> toCaseFold -- | Matcher that succeeds if the subject represents a valid date with -- an optional time. date :: Maybe (CompUTC, Time.UTCTime) -- ^ If Nothing, any valid date and time will succeed as a match; -- the matcher will return False if the subject is not a valid -- date. If Just, the subject must be a valid date and must fit -- within the range indicated. -> Matcher date mayPair = Matcher (pack "date") md mr where md = case mayPair of Nothing -> pack "any valid date with optional time" Just (c, t) -> pack $ "valid date and optional time, " ++ descUTC c t mr x = fromMaybe False $ do subjDT <- case P.parse dateTime "" x of Left _ -> mzero Right g -> return g case mayPair of Nothing -> return True Just (c, t) -> let cmp = compUTCtoCmp c in return $ subjDT `cmp` t ------------------------------------------------------------ -- PCRE primitives ------------------------------------------------------------ pcrePrim :: CaseSensitive -> BS.ByteString -> Either String (BS.ByteString -> Bool) pcrePrim c bs = let u8 = [PCRE.utf8] opts = case c of Sensitive -> u8 Insensitive -> PCRE.caseless:u8 doMatch rx s = isJust $ PCRE.match rx s [] in fmap doMatch $ PCRE.compileM bs opts ------------------------------------------------------------ -- Date parsers ------------------------------------------------------------ year :: Parser Integer year = read <$> replicateM 4 P.digit month :: Parser Int month = read <$> replicateM 2 P.digit day :: Parser Int day = read <$> replicateM 2 P.digit pDate :: Parser Time.Day pDate = p >>= failOnErr where p = Time.fromGregorianValid <$> year <* satisfy dateSep <*> month <* satisfy dateSep <*> day failOnErr = maybe (fail "could not parse date") return dateSep :: Char -> Bool dateSep c = c == '/' || c == '-' digit :: Char -> Bool digit c = c >= '0' && c <= '9' colon :: Char -> Bool colon = (== ':') hours :: Parser Int hours = p >>= (maybe (fail "could not parse hours") return) where p = f <$> satisfy digit <*> satisfy digit f d1 d2 = let r = read [d1,d2] in if r < 0 || r > 23 then Nothing else Just r minutes :: Parser Int minutes = p >>= maybe (fail "could not parse minutes") return where p = f <$ satisfy colon <*> satisfy digit <*> satisfy digit f d1 d2 = let r = read [d1, d2] in if r < 0 || r > 59 then Nothing else Just r seconds :: Parser Pico seconds = p >>= maybe (fail "could not parse seconds") return where p = f <$ satisfy colon <*> satisfy digit <*> satisfy digit f d1 d2 = let r = read [d1, d2] :: Int in if r < 0 || r > 59 then Nothing else Just . fromIntegral $ r time :: Parser Time.TimeOfDay time = f <$> hours <*> minutes <*> optional seconds where f h m ms = Time.TimeOfDay h m (fromMaybe 0 ms) tzSign :: Parser (Int -> Int) tzSign = (id <$ satisfy plus) <|> (negate <$ satisfy minus) where plus = (== '+') minus = (== '-') tzNumber :: Parser Int tzNumber = read <$> replicateM 4 (satisfy digit) timeZone :: Parser Time.TimeZone timeZone = p >>= maybe (fail "could not parse time zone") return where p = f <$> tzSign <*> tzNumber f s = minsToOffset . s minsToOffset m = if abs m > 840 then Nothing else Just (Time.TimeZone m False "") white :: Char -> Bool white c = c == ' ' || c == '\t' timeWithZone :: Parser (Time.TimeOfDay, Maybe Time.TimeZone) timeWithZone = (,) <$> time <* many (satisfy white) <*> optional timeZone dateTime :: Parser Time.UTCTime dateTime = f <$> pDate <* many (satisfy white) <*> optional timeWithZone where f d mayTwithZ = Time.zonedTimeToUTC zt where zt = Time.ZonedTime lt tz lt = Time.LocalTime d tod (tod, tz) = case mayTwithZ of Nothing -> (Time.midnight, Time.utc) Just (t, mayZ) -> case mayZ of Nothing -> (t, Time.utc) Just z -> (t, z) ------------------------------------------------------------ -- Other date things ------------------------------------------------------------ data CompUTC = UAfter | UOnOrAfter | UExactly | UBefore | UOnOrBefore deriving (Eq, Show, Ord) descUTC :: CompUTC -> Time.UTCTime -> String descUTC c u = "date is " ++ co ++ " " ++ dt where co = case c of UAfter -> "after" UOnOrAfter -> "on or after" UExactly -> "on" UBefore -> "before" UOnOrBefore -> "on or before" dt = show dy ++ " " ++ hs ++ ":" ++ ms ++ ":" ++ ss ++ " UTC" Time.UTCTime dy difft = u Time.TimeOfDay h m s = Time.timeToTimeOfDay difft (hs, ms, ss) = (show h, show m, show (round s :: Int)) compUTCtoCmp :: Ord a => CompUTC -> a -> a -> Bool compUTCtoCmp c = case c of UAfter -> (>) UOnOrAfter -> (>=) UExactly -> (==) UBefore -> (<) UOnOrBefore -> (<=)