\begin{code} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} \end{code} \begin{code} module Text.RE.Capture ( Matches(..) , Match(..) , Capture(..) , noMatch , emptyMatchArray -- Matches functions , anyMatches , countMatches , matches , mainCaptures -- Match functions , matched , matchedText , matchCapture , matchCaptures , (!$$) , captureText , (!$$?) , captureTextMaybe , (!$) , capture , (!$?) , captureMaybe -- Capture functions , hasCaptured , capturePrefix , captureSuffix ) where \end{code} \begin{code} import Data.Array import Data.Maybe import Text.Regex.Base import Text.RE.CaptureID infixl 9 !$, !$$ \end{code} \begin{code} -- | the result type to use when every match is needed, not just the -- first match of the RE against the source data Matches a = Matches { matchesSource :: !a -- ^ the source text being matched , allMatches :: ![Match a] -- ^ all captures found, left to right } deriving (Show,Eq) \end{code} \begin{code} -- | the result of matching a RE to a text once, listing the text that -- was matched and the named captures in the RE and all of the substrings -- matched, with the text captured by the whole RE; a complete failure -- to match will be represented with an empty array (with bounds (0,-1)) data Match a = Match { matchSource :: !a -- ^ the whole source text , captureNames :: !CaptureNames -- ^ the RE's capture names , matchArray :: !(Array CaptureOrdinal (Capture a)) -- ^ 0..n-1 captures, -- starting with the -- text matched by the -- whole RE } deriving (Show,Eq) \end{code} \begin{code} -- | the matching of a single sub-expression against part of the source -- text data Capture a = Capture { captureSource :: !a -- ^ the whole text that was searched , capturedText :: !a -- ^ the text that was matched , captureOffset :: !Int -- ^ the number of characters preceding the -- match with -1 used if no text was captured -- by the RE (not even the empty string) , captureLength :: !Int -- ^ the number of chacter in the captured -- sub-string } deriving (Show,Eq) \end{code} \begin{code} -- | Construct a Match that does not match anything. noMatch :: a -> Match a noMatch t = Match t noCaptureNames emptyMatchArray -- | an empty array of Capture emptyMatchArray :: Array CaptureOrdinal (Capture a) emptyMatchArray = listArray (CaptureOrdinal 0,CaptureOrdinal $ -1) [] \end{code} \begin{code} instance Functor Matches where fmap f Matches{..} = Matches { matchesSource = f matchesSource , allMatches = map (fmap f) allMatches } instance Functor Match where fmap f Match{..} = Match { matchSource = f matchSource , captureNames = captureNames , matchArray = fmap (fmap f) matchArray } instance Functor Capture where fmap f c@Capture{..} = c { captureSource = f captureSource , capturedText = f capturedText } \end{code} \begin{code} -- | tests whether the RE matched the source text at all anyMatches :: Matches a -> Bool anyMatches = not . null . allMatches -- | count the matches countMatches :: Matches a -> Int countMatches = length . allMatches matches :: Matches a -> [a] matches = map capturedText . mainCaptures -- | extract the main capture from each match mainCaptures :: Matches a -> [Capture a] mainCaptures ac = [ capture c0 cs | cs<-allMatches ac ] where c0 = CID_ordinal $ CaptureOrdinal 0 \end{code} \begin{code} -- | tests whether the RE matched the source text at all matched :: Match a -> Bool matched = isJust . matchCapture -- | tests whether the RE matched the source text at all matchedText :: Match a -> Maybe a matchedText = fmap capturedText . matchCapture -- | the top-level capture if the source text matched the RE, -- Nothing otherwise matchCapture :: Match a -> Maybe (Capture a) matchCapture = fmap fst . matchCaptures -- | the top-level capture and the sub captures if the text matched -- the RE, Nothing otherwise matchCaptures :: Match a -> Maybe (Capture a,[Capture a]) matchCaptures Match{..} = case rangeSize (bounds matchArray) == 0 of True -> Nothing False -> Just (matchArray!0,drop 1 $ elems matchArray) -- | an alternative for captureText (!$$) :: Match a -> CaptureID -> a (!$$) = flip captureText -- | look up the text of the nth capture, 0 being the match of the whole -- RE against the source text, 1, the first bracketed sub-expression to -- be matched and so on captureText :: CaptureID -> Match a -> a captureText cid mtch = capturedText $ capture cid mtch -- | an alternative for captureTextMaybe (!$$?) :: Match a -> CaptureID -> Maybe a (!$$?) = flip captureTextMaybe -- | look up the text of the nth capture (0 being the match of the -- whole), returning Nothing if the Match doesn't contain the capture captureTextMaybe :: CaptureID -> Match a -> Maybe a captureTextMaybe cid mtch = do cap <- mtch !$? cid case hasCaptured cap of True -> Just $ capturedText cap False -> Nothing -- | an alternative for capture (!$) :: Match a -> CaptureID -> Capture a (!$) = flip capture -- | look up the nth capture, 0 being the match of the whole RE against -- the source text, 1, the first bracketed sub-expression to be matched -- and so on capture :: CaptureID -> Match a -> Capture a capture cid mtch = fromMaybe oops $ mtch !$? cid where oops = error $ "capture: out of bounds (" ++ show cid ++ ")" -- | an alternative for capture captureMaybe (!$?) :: Match a -> CaptureID -> Maybe (Capture a) (!$?) = flip captureMaybe -- | look up the nth capture, 0 being the match of the whole RE against -- the source text, 1, the first bracketed sub-expression to be matched -- and so on, returning Nothing if there is no such capture, or if the -- capture failed to capture anything (being in a failed alternate) captureMaybe :: CaptureID -> Match a -> Maybe (Capture a) captureMaybe cid mtch@Match{..} = do cap <- case bounds matchArray `inRange` CaptureOrdinal i of True -> Just $ matchArray ! CaptureOrdinal i False -> Nothing case hasCaptured cap of True -> Just cap False -> Nothing where i = lookupCaptureID cid mtch lookupCaptureID :: CaptureID -> Match a -> Int lookupCaptureID cid Match{..} = findCaptureID cid captureNames \end{code} \begin{code} -- | test if the capture has matched any text hasCaptured :: Capture a -> Bool hasCaptured = (>=0) . captureOffset -- | returns the text preceding the match capturePrefix :: Extract a => Capture a -> a capturePrefix Capture{..} = before captureOffset captureSource -- | returns the text after the match captureSuffix :: Extract a => Capture a -> a captureSuffix Capture{..} = after (captureOffset+captureLength) captureSource \end{code} \begin{code} -- | for matching just the first RE against the source text instance ( RegexContext regex source (AllTextSubmatches (Array Int) (source,(Int,Int))) , RegexLike regex source ) => RegexContext regex source (Match source) where match r s = cvt s $ getAllTextSubmatches $ match r s matchM r s = do y <- matchM r s return $ cvt s $ getAllTextSubmatches y -- | for matching all REs against the source text instance ( RegexContext regex source [MatchText source] , RegexLike regex source ) => RegexContext regex source (Matches source) where match r s = Matches s $ map (cvt s) $ match r s matchM r s = do y <- matchM r s return $ Matches s $ map (cvt s) y \end{code} \begin{code} cvt :: source -> MatchText source -> Match source cvt hay arr = Match { matchSource = hay , captureNames = noCaptureNames , matchArray = ixmap (CaptureOrdinal lo,CaptureOrdinal hi) getCaptureOrdinal $ fmap f arr } where (lo,hi) = bounds arr f (ndl,(off,len)) = Capture { captureSource = hay , capturedText = ndl , captureOffset = off , captureLength = len } \end{code}