{-# LANGUAGE MultiParamTypeClasses, TypeSynonymInstances, FlexibleInstances, FlexibleContexts #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-
Copyright (C) 2010-2015 Dr. Alistair Ward
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-}
{- |
[@AUTHOR@] Dr. Alistair Ward
[@DESCRIPTION@] Creates a /back-end/ implementation, conforming to .
[@CAVEATS@]
* The underlying polymorphic (in terms of the base-type of the list of input data) 'RegExDot.RegEx.ExtendedRegEx'-engine is never going to be a drop-in replacement for other /regex/-engines,
so this standard interface has only been implemented for traditional input data-type ['Char'].
* The standard interface requires many Haskell-extensions, but since this implementation is just a thin layer over the underlying portable polymorphic 'RegExDot.RegEx.ExtendedRegEx'-engine,
the latter can still be used directly, where any of these extensions are unavailable.
-}
module RegExChar.RegExOptsChar(
-- * Types
-- ** Type-synonyms
MatchSpan,
-- MatchDataSpan,
RegExOptsChar,
-- * Functions
-- exciseNonCapturingTopLevelAlternatives,
-- matchUntilFailure,
-- toMatchDataSpanList,
toZeroIndexedArray,
-- ** Operators
(=~)
-- ** Predicates
-- complyStrictlyWithPosix,
-- hasNonCapturingTopLevelAlternatives
) where
import qualified Data.Array.IArray
import qualified Data.Default
import qualified RegExChar.ExtendedRegExChar as ExtendedRegExChar
import qualified RegExDot.CompilationOptions
import qualified RegExDot.ConsumptionBounds
import qualified RegExDot.DataSpan
import qualified RegExDot.DataSpanTree
import qualified RegExDot.ExecutionOptions
import qualified RegExDot.RegEx
import qualified RegExDot.RegExOpts
import qualified RegExDot.Result
import qualified RegExDot.Tree
import qualified Text.Regex.Base.RegexLike as RegexLike
import Text.Regex.Base.Context() -- Instance-declarations.
import qualified ToolShed.Data.List
import qualified ToolShed.Options
infix 4 =~ -- Same as (==).
-- | Defines a specific instance of the polymorphic base-type.
type RegExOptsChar = RegExDot.RegExOpts.RegExOpts ExtendedRegExChar.ExtendedRegExChar
-- | Convenience accessor-function.
hasNonCapturingTopLevelAlternatives :: RegExOptsChar -> Bool
hasNonCapturingTopLevelAlternatives = ExtendedRegExChar.hasNonCapturingTopLevelAlternatives . RegExDot.RegExOpts.regEx
-- | Convenience accessor-function.
complyStrictlyWithPosix :: RegExOptsChar -> Bool
complyStrictlyWithPosix = RegExDot.CompilationOptions.complyStrictlyWithPosix . RegExDot.RegExOpts.compilationOptions
instance RegexLike.RegexOptions RegExOptsChar RegExDot.CompilationOptions.CompilationOptions RegExDot.ExecutionOptions.ExecutionOptions where
blankCompOpt = ToolShed.Options.blankValue
blankExecOpt = ToolShed.Options.blankValue
defaultCompOpt = Data.Default.def
defaultExecOpt = Data.Default.def
setExecOpts e r = r { RegExDot.RegExOpts.executionOptions = e }
getExecOpts = RegExDot.RegExOpts.executionOptions
-- Newer versions of this 'Text.Regex.Base.RegexLike.RegexMaker' have additional monadic methods, which can fail on parse-errors.
instance RegexLike.RegexMaker RegExOptsChar RegExDot.CompilationOptions.CompilationOptions RegExDot.ExecutionOptions.ExecutionOptions String where
makeRegexOpts c e source = RegExDot.RegExOpts.MkRegExOpts {
RegExDot.RegExOpts.compilationOptions = c,
RegExDot.RegExOpts.executionOptions = e,
RegExDot.RegExOpts.regEx = read source
}
-- | The offset and length of the 'RegExDot.RegEx.InputData' consumed in one 'RegExDot.RegEx.Match'.
type MatchSpan = (RegexLike.MatchOffset, RegexLike.MatchLength)
{- |
* The 'MatchSpan' augmented by the consumed 'ExtendedRegExChar.InputData' to which it refers.
* Similar to 'RegExDot.DataSpan.DataSpan'.
-}
type MatchDataSpan = (ExtendedRegExChar.InputData, MatchSpan)
{- |
* Convert the 'RegExDot.RegEx.MatchList', into the 'MatchDataSpan's required by "Text.Regex.Base.RegexLike".
* Treat the whole 'ExtendedRegExChar.ExtendedRegExChar' as an additional implicit capture-group.
* CAVEAT: @RegExDot.DataSpanTree.extractCaptureGroups True@ may return some 'RegExDot.DataSpan.DataSpan's containing the artificial offset @-1@ specified by Posix => don't use this as the basis for any arithmetic.
-}
toMatchDataSpanList
:: Bool
-> RegExDot.ConsumptionBounds.DataLength -- ^ The offset to use for zero-length match.
-> RegExDot.RegEx.MatchList Char -- ^ The list of matches.
-> [MatchDataSpan]
toMatchDataSpanList _ offset [] = [RegExDot.DataSpan.empty offset] -- The whole regex matched, but consumed nothing.
toMatchDataSpanList strictPosixCompliance _ matchList = RegExDot.DataSpanTree.extractCaptureGroups strictPosixCompliance . return {-to List-monad-} . RegExDot.Tree.Node . return {-to List-monad-} $ RegExDot.DataSpanTree.toTreeList matchList
-- | Optionally remove the 2nd element from the list, where it represents the data captured by a groups of top-level 'Alternatives', which weren't explicitly delimited & therefore are non-capturing.
exciseNonCapturingTopLevelAlternatives :: RegExOptsChar -> [MatchDataSpan] -> [MatchDataSpan]
exciseNonCapturingTopLevelAlternatives regExOptsChar
| hasNonCapturingTopLevelAlternatives regExOptsChar = ToolShed.Data.List.excise 1 -- The zeroeth represents the whole regex, the 1st represents the top-level non-capturing group of Alternatives, & the remainder represent explicitly delimited groups.
| otherwise = id
-- | Convert the specified list, into a zero-indexed array.
toZeroIndexedArray :: [e] -> Data.Array.IArray.Array Int e
toZeroIndexedArray l = Data.Array.IArray.listArray (0, pred $ length l) l
{- |
* Repeatedly apply the 'RegExOptsChar' to the 'ExtendedRegExChar.InputData', forwarding unmatched input data to the next match-attempt, until it's all been consumed.
* The offsets, from the start of the input data, of all matches after the first, are shifted to account for input data already consumed by previous matches.
-}
matchUntilFailure
:: RegExOptsChar -- ^ The match-criteria.
-> ExtendedRegExChar.InputData -- ^ The input-data, to be consumed by repeated matching.
-> [RegExDot.RegEx.MatchList Char]
matchUntilFailure regExOptsChar inputData = shiftOffsets 0 $ matchUntilFailure' inputData where
matchUntilFailure' :: ExtendedRegExChar.InputData -> [RegExDot.RegEx.MatchList Char]
matchUntilFailure' unmatchedInputData = case unmatchedInputData ExtendedRegExChar.+~ regExOptsChar of -- CAVEAT: .
(_, Just matchList, maybeSternAnchorResult) -> matchList : if null (RegExDot.RegEx.extractDataFromMatchList matchList) then [] else matchUntilFailure' (RegExDot.RegEx.extractDataFromMatch' maybeSternAnchorResult)
_ -> []
shiftOffsets :: RegExDot.ConsumptionBounds.DataLength -> [RegExDot.RegEx.MatchList Char] -> [RegExDot.RegEx.MatchList Char]
shiftOffsets offset (matchList : matchLists) = let
shiftedMatchList :: RegExDot.RegEx.MatchList Char
shiftedMatchList = RegExDot.RegEx.shiftMatchList offset matchList
in shiftedMatchList : shiftOffsets (RegExDot.DataSpan.after . last . RegExDot.DataSpanTree.flattenTreeList 0 $ RegExDot.DataSpanTree.toTreeList shiftedMatchList) matchLists {-recurse-}
shiftOffsets _ _ = []
instance RegexLike.RegexLike RegExOptsChar ExtendedRegExChar.InputData where
{- E.g.:
("xabcdxabdxxadx" RegExOptsChar.=~ "a(b(c?))*d") :: [Text.Regex.Base.RegexLike.MatchArray]
[array (0,2) [(0,(1,4)),(1,(2,2)),(2,(3,1))],array (0,2) [(0,(6,3)),(1,(7,1)),(2,(8,0))],array (0,1) [(0,(11,2)),(1,(-1,0))]]
-}
-- matchAll :: RegExOptsChar -> ExtendedRegExChar.InputData -> [RegexLike.MatchArray]
matchAll regExOptsChar = map (toZeroIndexedArray . map snd {-span-} . exciseNonCapturingTopLevelAlternatives regExOptsChar . toMatchDataSpanList (complyStrictlyWithPosix regExOptsChar) 0) . matchUntilFailure regExOptsChar
{- E.g.:
("xabcdxabdxxadx" RegExOptsChar.=~ "a(b(c?))*d") :: [Text.Regex.Base.RegexLike.MatchText String]
[array (0,2) [(0,("abcd",(1,4))),(1,("bc",(2,2))),(2,("c",(3,1)))],array (0,2) [(0,("abd",(6,3))),(1,("b",(7,1))),(2,("",(8,0)))],array (0,1) [(0,("ad",(11,2))),(1,("",(-1,0)))]]
-}
-- matchAllText :: RegExOptsChar -> ExtendedRegExChar.InputData -> [RegexLike.MatchText ExtendedRegExChar.InputData]
matchAllText regExOptsChar = map (toZeroIndexedArray . exciseNonCapturingTopLevelAlternatives regExOptsChar . toMatchDataSpanList (complyStrictlyWithPosix regExOptsChar) 0) . matchUntilFailure regExOptsChar
{- E.g.:
("xabcdx" RegExOptsChar.=~ "a(b(c?))*d") :: Text.Regex.Base.RegexLike.MatchArray
array (0,2) [(0,(1,4)),(1,(2,2)),(2,(3,1))]
-}
-- matchOnce :: RegExOptsChar -> ExtendedRegExChar.InputData -> Maybe RegexLike.MatchArray
matchOnce regExOptsChar inputData = (
toZeroIndexedArray . map snd {-span-} . exciseNonCapturingTopLevelAlternatives regExOptsChar . toMatchDataSpanList (
complyStrictlyWithPosix regExOptsChar
) (
RegExDot.RegEx.externalMatchLength $ RegExDot.Result.getPreMatch extendedRegExResult
)
) `fmap` RegExDot.Result.getMatchList extendedRegExResult where
extendedRegExResult :: RegExDot.RegEx.Result Char
extendedRegExResult = inputData ExtendedRegExChar.+~ regExOptsChar -- CAVEAT: .
{- E.g.:
("xabcdx" RegExOptsChar.=~ "a(b(c?))*d") :: (String, Text.Regex.Base.RegexLike.MatchText String, String)
("x",array (0,2) [(0,("abcd",(1,4))),(1,("bc",(2,2))),(2,("c",(3,1)))],"x")
-}
-- matchOnceText :: RegExOptsChar -> ExtendedRegExChar.InputData -> Maybe (ExtendedRegExChar.InputData, MatchText ExtendedRegExChar.InputData, ExtendedRegExChar.InputData)
matchOnceText regExOptsChar inputData = case inputData ExtendedRegExChar.+~ regExOptsChar of -- CAVEAT: .
(maybeBowAnchorResult, Just matchList, maybeSternAnchorResult) -> Just (
RegExDot.RegEx.extractDataFromMatch' maybeBowAnchorResult,
toZeroIndexedArray . exciseNonCapturingTopLevelAlternatives regExOptsChar $ toMatchDataSpanList (
complyStrictlyWithPosix regExOptsChar
) (
RegExDot.RegEx.externalMatchLength maybeBowAnchorResult
) matchList,
RegExDot.RegEx.extractDataFromMatch' maybeSternAnchorResult
)
_ -> Nothing
{- E.g.:
("xabcdx" RegExOptsChar.=~ "a(b(c?))*d") :: Bool
True
-}
-- matchTest :: RegExOptsChar -> ExtendedRegExChar.InputData -> Bool
matchTest = flip (ExtendedRegExChar.=~) -- CAVEAT: .
{- |
* Match-operator.
* The polymorphic return-type is resolved by the caller's "RegexLike.RegexContext".
-}
(=~) :: RegexLike.RegexContext RegExOptsChar ExtendedRegExChar.InputData target
=> ExtendedRegExChar.InputData -- ^ The input data.
-> String -- ^ The string from which to read the regex-specification.
-> target -- ^ The polymorphic return-type.
inputData =~ s = (RegexLike.makeRegex s :: RegExOptsChar) `RegexLike.match` inputData