{-# OPTIONS_GHC -fglasgow-exts #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Regex.Base.RegexLike -- Copyright : (c) Chris Kuklewicz 2006 -- License : BSD-style (see the file LICENSE) -- -- Maintainer : libraries@haskell.org, textregexlazy@personal.mightyreason.com -- Stability : experimental -- Portability : non-portable (MPTC+FD) -- -- Classes and instances for Regex matching. -- -- -- All the classes are declared here, and some common type aliases, and -- the MatchResult data type. -- -- The only instances here are for Extract String and Extract ByteString. -- There are no data values. The 'RegexContext' instances are in -- "Text.Regex.Base.Context", except for ones which run afoul of a -- repeated variable (RegexContext regex a a), which are defined in each -- modules' String and ByteString modules. ----------------------------------------------------------------------------- module Text.Regex.Base.RegexLike ( -- ** Type aliases MatchOffset, MatchLength, MatchArray, MatchText, -- ** Data types MatchResult(..), -- ** Classes RegexOptions(..), RegexMaker(..), RegexLike(..), RegexContext(..), Extract(..), ) where import Data.Array(Array,(!)) import Data.Maybe(isJust) import Data.ByteString(ByteString) import qualified Data.ByteString as B (take,drop,empty) -- | 0 based index from start of source, or (-1) for unused type MatchOffset = Int -- | non-negative length of a match type MatchLength = Int -- | 0 based array, with 0th index indicating the full match. If the -- full match location is not available, represent as (0,0). type MatchArray = Array Int (MatchOffset,MatchLength) type MatchText source = Array Int (source,(MatchOffset,MatchLength)) -- | This is the same as the type from JRegex. data MatchResult a = MR { mrBefore :: a, mrMatch :: a, mrAfter :: a, mrSubList :: [a], mrSubs :: Array Int a } ---------------- -- | Rather than carry them around spearately, the options for how to -- execute a regex are kept as part of the regex. There are two types -- of options. Those that can only be specified at compilation time -- and never changed are CompOpt. Those that can be changed later and -- affect how matching is performed are ExecOpt. The actually types -- for these depend on the backend. class RegexOptions regex compOpt execOpt | regex->compOpt execOpt, compOpt->regex execOpt, execOpt->regex compOpt where blankCompOpt :: compOpt -- ^ no options set at all in the backend blankExecOpt :: execOpt -- ^ no options set at all in the backend defaultCompOpt :: compOpt -- ^ reasonable options (extended,caseSensitive,multiline regex) defaultExecOpt :: execOpt -- ^ reasonable options (extended,caseSensitive,multiline regex) setExecOpts :: execOpt -> regex -> regex -- ^ forget old flags and use new ones getExecOpts :: regex -> execOpt -- ^ retrieve the current flags ---------------- -- | RegexMaker captures the creation of the compiled regular -- expression from a source type and an option type. The 'makeRegex' -- function has a default implementation that depends on makeRegexOpts -- and used 'defaultCompOpt' and 'defaultExecOpt'. class (RegexOptions regex compOpt execOpt) => RegexMaker regex compOpt execOpt source | regex -> compOpt execOpt, compOpt -> regex execOpt, execOpt -> regex compOpt where -- | make using the defaultCompOpt and defaultExecOpt makeRegex :: source -> regex -- | Specify your own options makeRegexOpts :: compOpt -> execOpt -> source -> regex makeRegex = makeRegexOpts defaultCompOpt defaultExecOpt ---------------- -- | RegexLike is parametrized on a regular expression type and a -- source type to run the matching on. -- -- There are default implementations: matchTest and matchOnceText -- using matchOnce; matchCount and matchAllText using -- matchAll. matchOnce uses matchOnceText and matchAll uses -- matchAllText. So a minimal complete instance need to provide -- (matchOnce or matchOnceText) and (matchAll or matchAllText). class (Extract source)=> RegexLike regex source where matchAll :: regex -> source-> [MatchArray] -- | This can return an array of (offset,length) index pairs for the -- match and captured substrings. matchOnce :: regex -> source-> Maybe MatchArray matchCount :: regex -> source-> Int matchTest :: regex -> source-> Bool matchAllText :: regex -> source-> [MatchText source] -- | This can return a tuple of three items: the source before the -- match, an array of the match and captured substrings (with their -- indices), and the source after the match. matchOnceText :: regex -> source-> Maybe (source,MatchText source,source) matchAll regex source = map (fmap snd) (matchAllText regex source) matchOnce regex source = fmap (\(_,mt,_) -> fmap snd mt) (matchOnceText regex source) matchTest regex source = isJust (matchOnce regex source) matchCount regex source = length (matchAll regex source) matchOnceText regex source = fmap (\ma -> let (o,l) = ma!0 in (before o source ,fmap (\ol -> (extract ol source,ol)) ma ,after (o+l) source)) (matchOnce regex source) matchAllText regex source = map (fmap (\ol -> (extract ol source,ol))) (matchAll regex source) ---------------- -- | RegexContext is the polymorphic interface to do matching class (RegexLike regex source) => RegexContext regex source target where match :: regex -> source -> target matchM :: (Monad m) => regex -> source -> m target ---------------- -- | Extract allows for indexing operations on String or ByteString. class Extract source where -- | before is a renamed "take" before :: Int -> source -> source -- | after is a renamed "drop" after :: Int -> source -> source -- | For when there is no match, this can construct an empty data value empty :: source -- | extract takes an offset and length and has a default -- implementation of @extract (off,len) source = before len (after -- off source)@ extract :: (Int,Int) -> source -> source extract (off,len) source = before len (after off source) instance Extract String where before = take; after = drop; empty = [] instance Extract ByteString where before = B.take; after = B.drop; empty = B.empty