{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-binds #-} {-# LANGUAGE UndecidableInstances, OverlappingInstances #-} {-# LANGUAGE FlexibleInstances, BangPatterns #-} {-# LANGUAGE TemplateHaskell, QuasiQuotes #-} {-# LANGUAGE ForeignFunctionInterface #-} -- | A usable regular expressions library on top of pcre-light. module Text.Regex.PCRE.Heavy ( -- * Matching (=~) , scan , scanO -- * Replacement , sub , subO , gsub , gsubO -- * QuasiQuoter , re , mkRegexQQ -- * Types from pcre-light , Regex , PCREOption -- * Advanced raw stuff , rawMatch , rawSub ) where import Language.Haskell.TH hiding (match) import Language.Haskell.TH.Quote import Language.Haskell.TH.Syntax import qualified Text.Regex.PCRE.Light as PCRE import Text.Regex.PCRE.Light.Base import Control.Applicative ((<$>)) import Data.Maybe (isJust, fromMaybe) import Data.Stringable import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Internal as BS import System.IO.Unsafe (unsafePerformIO) import Foreign class RegexResult a where fromResult :: Maybe [BS.ByteString] -> a instance RegexResult (Maybe [BS.ByteString]) where fromResult = id instance Stringable a => RegexResult (Maybe [a]) where fromResult x = map fromByteString <$> x instance RegexResult Bool where fromResult = isJust reMatch :: (Stringable a, RegexResult b) => Regex -> a -> b reMatch r s = fromResult $ PCRE.match r (toByteString s) [] -- | Matches a string with a regex. -- -- You can cast the result to Bool or Maybe [Stringable] -- Maybe [Stringable] only represents the first match and its groups. -- Use 'scan' to find all matches. -- -- Note: if casts to bool automatically. -- -- >>> :set -XQuasiQuotes -- >>> "https://unrelenting.technology" =~ [re|^http.*|] :: Bool -- True -- >>> "https://unrelenting.technology" =~ [re|^https?://([^\.]+)\..*|] :: Maybe [String] -- Just ["https://unrelenting.technology","unrelenting"] -- >>> if "https://unrelenting.technology" =~ [re|^http.*|] then "YEP" else "NOPE" -- "YEP" (=~) :: (Stringable a, RegexResult b) => a -> Regex -> b (=~) = flip reMatch -- | Does raw PCRE matching (you probably shouldn't use this directly). -- -- >>> :set -XOverloadedStrings -- >>> rawMatch [re|\w{2}|] "a a ab abc ba" 0 [] -- Just [(4,6)] -- >>> rawMatch [re|\w{2}|] "a a ab abc ba" 6 [] -- Just [(7,9)] -- >>> rawMatch [re|(\w)(\w)|] "a a ab abc ba" 0 [] -- Just [(4,6),(4,5),(5,6)] rawMatch :: Regex -> BS.ByteString -> Int -> [PCREExecOption] -> Maybe [(Int, Int)] rawMatch r@(Regex pcreFp _) s offset opts = unsafePerformIO $ do withForeignPtr pcreFp $ \pcrePtr -> do let nCapt = PCRE.captureCount r ovecSize = (nCapt + 1) * 3 ovecBytes = ovecSize * size_of_cint allocaBytes ovecBytes $ \ovec -> do let (strFp, off, len) = BS.toForeignPtr s withForeignPtr strFp $ \strPtr -> do results <- c_pcre_exec pcrePtr nullPtr (strPtr `plusPtr` off) (fromIntegral len) (fromIntegral offset) (combineExecOptions opts) ovec (fromIntegral ovecSize) if results < 0 then return Nothing else let loop n o acc = if n == results then return $ Just $ reverse acc else do i <- peekElemOff ovec $! o j <- peekElemOff ovec (o + 1) loop (n + 1) (o + 2) ((fromIntegral i, fromIntegral j) : acc) in loop 0 0 [] substr :: BS.ByteString -> (Int, Int) -> BS.ByteString substr s (f, t) = BS.take (t - f) . BS.drop f $ s -- | Searches the string for all matches of a given regex. -- -- >>> scan [re|\s*entry (\d+) (\w+)\s*&?|] " entry 1 hello &entry 2 hi" -- [[" entry 1 hello &","1","hello"],["entry 2 hi","2","hi"]] scan :: (Stringable a) => Regex -> a -> [[a]] scan r s = scanO r [] s -- | Exactly like 'scan', but passes runtime options to PCRE. scanO :: (Stringable a) => Regex -> [PCREExecOption] -> a -> [[a]] scanO r opts s = map fromByteString <$> loop 0 [] where str = toByteString s loop offset acc = case rawMatch r str offset opts of Nothing -> reverse acc Just [] -> reverse acc Just ms -> loop (maximum $ map snd ms) ((map (substr str) ms) : acc) class RegexReplacement a where performReplacement :: BS.ByteString -> [BS.ByteString] -> a -> BS.ByteString instance Stringable a => RegexReplacement a where performReplacement _ _ to = toByteString to instance Stringable a => RegexReplacement (a -> [a] -> a) where performReplacement from groups replacer = toByteString $ replacer (fromByteString from) (map fromByteString groups) instance Stringable a => RegexReplacement (a -> a) where performReplacement from _ replacer = toByteString $ replacer (fromByteString from) instance Stringable a => RegexReplacement ([a] -> a) where performReplacement _ groups replacer = toByteString $ replacer (map fromByteString groups) rawSub :: RegexReplacement r => Regex -> r -> BS.ByteString -> Int -> [PCREExecOption] -> Maybe (BS.ByteString, Int) rawSub r t s offset opts = case rawMatch r s offset opts of Just ((begin, end):groups) -> Just (BS.concat [ substr s (0, begin) , performReplacement (substr s (begin, end)) (map (substr s) groups) t , substr s (end, BS.length s)], end) _ -> Nothing -- | Replaces the first occurence of a given regex. -- -- >>> sub [re|thing|] "world" "Hello, thing thing" :: String -- "Hello, world thing" -- -- >>> sub [re|a|] "b" "c" :: String -- "c" -- -- You can use functions! -- A function of Stringable gets the full match. -- A function of [Stringable] gets the groups. -- A function of Stringable -> [Stringable] gets both. -- -- >>> sub [re|%(\d+)(\w+)|] (\(d:w:_) -> "{" ++ d ++ " of " ++ w ++ "}" :: String) "Hello, %20thing" :: String -- "Hello, {20 of thing}" sub :: (Stringable a, RegexReplacement r) => Regex -> r -> a -> a sub r t s = subO r [] t s -- | Exactly like 'sub', but passes runtime options to PCRE. subO :: (Stringable a, RegexReplacement r) => Regex -> [PCREExecOption] -> r -> a -> a subO r opts t s = fromMaybe s $ fromByteString <$> fst <$> rawSub r t (toByteString s) 0 opts -- | Replaces all occurences of a given regex. -- -- See 'sub' for more documentation. -- -- >>> gsub [re|thing|] "world" "Hello, thing thing" :: String -- "Hello, world world" gsub :: (Stringable a, RegexReplacement r) => Regex -> r -> a -> a gsub r t s = gsubO r [] t s -- | Exactly like 'gsub', but passes runtime options to PCRE. gsubO :: (Stringable a, RegexReplacement r) => Regex -> [PCREExecOption] -> r -> a -> a gsubO r opts t s = fromByteString $ loop 0 str where str = toByteString s loop offset acc = case rawSub r t acc offset opts of Just (result, newOffset) -> loop newOffset result _ -> acc instance Lift PCREOption where lift o = [| o |] quoteExpRegex :: [PCREOption] -> String -> ExpQ quoteExpRegex opts txt = [| PCRE.compile (BS.pack txt) opts |] where !_ = PCRE.compile (BS.pack txt) opts -- check at compile time -- | Returns a QuasiQuoter like 're', but with given PCRE options. mkRegexQQ :: [PCREOption] -> QuasiQuoter mkRegexQQ opts = QuasiQuoter { quoteExp = quoteExpRegex opts , quotePat = undefined , quoteType = undefined , quoteDec = undefined } -- | A QuasiQuoter for regular expressions that does a compile time check. re :: QuasiQuoter re = mkRegexQQ []