{-# LANGUAGE MultiParamTypeClasses, TypeSynonymInstances, FlexibleInstances, FlexibleContexts #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}

{-
	Copyright (C) 2010 Dr. Alistair Ward

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{- |
 [@AUTHOR@]	Dr. Alistair Ward

 [@DESCRIPTION@]	Creates a /back-end/ implementation, conforming to <http://hackage.haskell.org/packages/archive/regex-base/latest/doc/html/Text-Regex-Base-RegexLike.html>.

 [@CAVEATS@]

	* The underlying polymorphic (in terms of the base-type of the list of input data) 'RegEx.ExtendedRegEx'-engine is never going to be a drop-in replacement for other /regex/-engines,
	so this standard interface has only been implemented for traditional input data-type ['Char'].

	* The standard interface requires many Haskell-extensions, but since this implementation is just a thin layer over the underlying portable polymorphic 'RegEx.ExtendedRegEx'-engine,
	the latter can still be used directly, where any of these extensions are unavailable.
-}

module RegExChar.RegExOptsChar(
-- * Types
-- ** Type-synonyms
	MatchSpan,
--	MatchDataSpan,
	RegExOptsChar,
-- * Functions
--	exciseNonCapturingTopLevelAlternatives,
--	matchUntilFailure,
--	toMatchDataSpanList,
	toZeroIndexedArray,
-- ** Operators
	(=~)
-- ** Predicates
--	complyStrictlyWithPosix,
--	hasNonCapturingTopLevelAlternatives
) where

import			Control.Applicative((<$>))
import			Text.Regex.Base.Context()	--Instance-declarations.
import qualified	Data.Array
import qualified	RegExChar.ExtendedRegExChar	as ExtendedRegExChar
import qualified	RegExDot.CompilationOptions	as CompilationOptions
import qualified	RegExDot.ConsumptionBounds	as ConsumptionBounds
import qualified	RegExDot.DataSpan		as DataSpan
import qualified	RegExDot.DataSpanTree		as DataSpanTree
import qualified	RegExDot.ExecutionOptions	as ExecutionOptions
import qualified	RegExDot.RegEx			as RegEx
import qualified	RegExDot.RegExOpts		as RegExOpts
import qualified	RegExDot.Result			as Result
import qualified	RegExDot.Tree			as Tree
import qualified	Text.Regex.Base.RegexLike	as RegexLike
import qualified	ToolShed.ListPlus		as ListPlus
import qualified	ToolShed.Defaultable		as Defaultable
import qualified	ToolShed.Options		as Options

infix 4 =~	--Same as (==).

-- | Defines a specific instance of the polymorphic base-type.
type RegExOptsChar	= RegExOpts.RegExOpts ExtendedRegExChar.ExtendedRegExChar

-- | Convenience accessor-function.
hasNonCapturingTopLevelAlternatives :: RegExOptsChar -> Bool
hasNonCapturingTopLevelAlternatives	= ExtendedRegExChar.hasNonCapturingTopLevelAlternatives . RegExOpts.regEx

-- | Convenience accessor-function.
complyStrictlyWithPosix :: RegExOptsChar -> Bool
complyStrictlyWithPosix	= CompilationOptions.complyStrictlyWithPosix . RegExOpts.compilationOptions

instance RegexLike.RegexOptions RegExOptsChar CompilationOptions.CompilationOptions ExecutionOptions.ExecutionOptions	where
	blankCompOpt	= Options.blankValue
	blankExecOpt	= Options.blankValue
	defaultCompOpt	= Defaultable.defaultValue
	defaultExecOpt	= Defaultable.defaultValue
	setExecOpts e r	= r { RegExOpts.executionOptions = e }
	getExecOpts	= RegExOpts.executionOptions

--Newer versions of this 'Text.Regex.Base.RegexLike.RegexMaker' have additional monadic methods, which can fail on parse-errors.
instance RegexLike.RegexMaker RegExOptsChar CompilationOptions.CompilationOptions ExecutionOptions.ExecutionOptions String	where
	makeRegexOpts c e source = RegExOpts.MkRegExOpts {
		RegExOpts.compilationOptions	= c,
		RegExOpts.executionOptions	= e,
		RegExOpts.regEx			= read source
	}

-- | The offset and length of the 'RegEx.InputData' consumed in one 'RegEx.Match'.
type MatchSpan		= (RegexLike.MatchOffset, RegexLike.MatchLength)

{- |
	* The 'MatchSpan' augmented by the consumed 'ExtendedRegExChar.InputData' to which it refers.

	* Similar to 'DataSpan.DataSpan'.
-}
type MatchDataSpan	= (ExtendedRegExChar.InputData, MatchSpan)

{- |
	* Convert the 'RegEx.MatchList', into the 'MatchDataSpan's required by "Text.Regex.Base.RegexLike".

	* Treat the whole 'ExtendedRegExChar.ExtendedRegExChar' as an additional implicit capture-group.

	* CAVEAT: @DataSpanTree.extractCaptureGroups True@ may return some 'DataSpan.DataSpan's containing the artificial offset @-1@ specified by Posix => don't use this as the basis for any arithmetic.
-}
toMatchDataSpanList ::
	Bool
	-> ConsumptionBounds.DataLength	-- ^ The offset to use for zero-length match.
	-> RegEx.MatchList Char		-- ^ The list of matches.
	-> [MatchDataSpan]
toMatchDataSpanList _ offset []				= [DataSpan.empty offset]	--The whole regex matched, but consumed nothing.
toMatchDataSpanList strictPosixCompliance _ matchList	= DataSpanTree.extractCaptureGroups strictPosixCompliance . return {-to List-monad-} . Tree.Node . return {-to List-monad-} $ DataSpanTree.toTreeList matchList

-- | Optionally remove the 2nd element from the list, where it represents the data captured by a groups of top-level 'Alternatives', which weren't explicitly delimited & therefore are non-capturing.
exciseNonCapturingTopLevelAlternatives :: RegExOptsChar -> [MatchDataSpan] -> [MatchDataSpan]
exciseNonCapturingTopLevelAlternatives regExOptsChar
	| hasNonCapturingTopLevelAlternatives regExOptsChar	= ListPlus.excise 1	--The zeroeth represents the whole regex, the 1st represents the top-level non-capturing group of Alternatives, & the remainder represent explicitly delimited groups.
	| otherwise						= id

-- | Convert the specified list, into a zero-indexed array.
toZeroIndexedArray :: [e] -> Data.Array.Array Int e
toZeroIndexedArray l	= Data.Array.listArray (0, length l - 1) l

{- |
	* Repeatedly apply the 'RegExOptsChar' to the 'ExtendedRegExChar.InputData', forwarding unmatched input data to the next match-attempt, until it's all been consumed.

	* The offsets, from the start of the input data, of all matches after the first, are shifted to account for input data already consumed by previous matches.
-}
matchUntilFailure ::
	RegExOptsChar			-- ^ The match-criteria.
	-> ExtendedRegExChar.InputData	-- ^ The input-data, to be consumed by repeated matching.
	-> [RegEx.MatchList Char]
matchUntilFailure regExOptsChar inputData	= shiftOffsets 0 $ matchUntilFailure' inputData	where
	matchUntilFailure' :: ExtendedRegExChar.InputData -> [RegEx.MatchList Char]
	matchUntilFailure' unmatchedInputData	= case unmatchedInputData ExtendedRegExChar.+~ regExOptsChar of	--CAVEAT: <http://hackage.haskell.org/trac/haskell-prime/wiki/QualifiedOperators>.
		(_, Just matchList, maybeSternAnchorResult)	-> matchList : if null (RegEx.extractDataFromMatchList matchList) then [] else matchUntilFailure' (RegEx.extractDataFromMatch' maybeSternAnchorResult)
		_						-> []

	shiftOffsets :: ConsumptionBounds.DataLength -> [RegEx.MatchList Char] -> [RegEx.MatchList Char]
	shiftOffsets offset (matchList : matchLists)	= let
		shiftedMatchList :: RegEx.MatchList Char
		shiftedMatchList	= RegEx.shiftMatchList offset matchList
	 in shiftedMatchList : shiftOffsets (DataSpan.after . last . DataSpanTree.flattenTreeList 0 $ DataSpanTree.toTreeList shiftedMatchList) matchLists {-recurse-}
	shiftOffsets _ _				= []

instance RegexLike.RegexLike RegExOptsChar ExtendedRegExChar.InputData	where
{- E.g.:
	("xabcdxabdxxadx" RegExOptsChar.=~ "a(b(c?))*d") :: [Text.Regex.Base.RegexLike.MatchArray]
	[array (0,2) [(0,(1,4)),(1,(2,2)),(2,(3,1))],array (0,2) [(0,(6,3)),(1,(7,1)),(2,(8,0))],array (0,1) [(0,(11,2)),(1,(-1,0))]]
-}
--	matchAll :: RegExOptsChar -> ExtendedRegExChar.InputData -> [RegexLike.MatchArray]
	matchAll regExOptsChar	= map (toZeroIndexedArray . map snd {-span-} . exciseNonCapturingTopLevelAlternatives regExOptsChar . toMatchDataSpanList (complyStrictlyWithPosix regExOptsChar) 0) . matchUntilFailure regExOptsChar

{- E.g.:
	("xabcdxabdxxadx" RegExOptsChar.=~ "a(b(c?))*d") :: [Text.Regex.Base.RegexLike.MatchText String]
	[array (0,2) [(0,("abcd",(1,4))),(1,("bc",(2,2))),(2,("c",(3,1)))],array (0,2) [(0,("abd",(6,3))),(1,("b",(7,1))),(2,("",(8,0)))],array (0,1) [(0,("ad",(11,2))),(1,("",(-1,0)))]]
-}
--	matchAllText :: RegExOptsChar -> ExtendedRegExChar.InputData -> [RegexLike.MatchText ExtendedRegExChar.InputData]
	matchAllText regExOptsChar	= map (toZeroIndexedArray . exciseNonCapturingTopLevelAlternatives regExOptsChar . toMatchDataSpanList (complyStrictlyWithPosix regExOptsChar) 0) . matchUntilFailure regExOptsChar

{- E.g.:
	("xabcdx" RegExOptsChar.=~ "a(b(c?))*d") :: Text.Regex.Base.RegexLike.MatchArray
	array (0,2) [(0,(1,4)),(1,(2,2)),(2,(3,1))]
-}
--	matchOnce :: RegExOptsChar -> ExtendedRegExChar.InputData -> Maybe RegexLike.MatchArray
	matchOnce regExOptsChar inputData	= toZeroIndexedArray . map snd {-span-} . exciseNonCapturingTopLevelAlternatives regExOptsChar . toMatchDataSpanList (
		complyStrictlyWithPosix regExOptsChar
	 ) (
		RegEx.externalMatchLength $ Result.getPreMatch extendedRegExResult
	 ) <$> Result.getMatchList extendedRegExResult	where
		extendedRegExResult :: RegEx.Result Char
		extendedRegExResult	= inputData ExtendedRegExChar.+~ regExOptsChar	--CAVEAT: <http://hackage.haskell.org/trac/haskell-prime/wiki/QualifiedOperators>.

{- E.g.:
	("xabcdx" RegExOptsChar.=~ "a(b(c?))*d") :: (String, Text.Regex.Base.RegexLike.MatchText String, String)
	("x",array (0,2) [(0,("abcd",(1,4))),(1,("bc",(2,2))),(2,("c",(3,1)))],"x")
-}
--	matchOnceText :: RegExOptsChar -> ExtendedRegExChar.InputData -> Maybe (ExtendedRegExChar.InputData, MatchText ExtendedRegExChar.InputData, ExtendedRegExChar.InputData)
	matchOnceText regExOptsChar inputData	= case inputData ExtendedRegExChar.+~ regExOptsChar of	--CAVEAT: <http://hackage.haskell.org/trac/haskell-prime/wiki/QualifiedOperators>.
		(maybeBowAnchorResult, Just matchList, maybeSternAnchorResult)	-> Just (
			RegEx.extractDataFromMatch' maybeBowAnchorResult,
			toZeroIndexedArray . exciseNonCapturingTopLevelAlternatives regExOptsChar $ toMatchDataSpanList (
				complyStrictlyWithPosix regExOptsChar
			) (
				RegEx.externalMatchLength maybeBowAnchorResult
			) matchList,
			RegEx.extractDataFromMatch' maybeSternAnchorResult
		 )
		_						-> Nothing

{- E.g.:
	("xabcdx" RegExOptsChar.=~ "a(b(c?))*d") :: Bool
	True
-}
--	matchTest :: RegExOptsChar -> ExtendedRegExChar.InputData -> Bool
	matchTest	= flip (ExtendedRegExChar.=~)	--CAVEAT: <http://hackage.haskell.org/trac/haskell-prime/wiki/QualifiedOperators>.

{- |
	* Match-operator.

	* The polymorphic return-type is resolved by the caller's "RegexLike.RegexContext".
-}
(=~) :: RegexLike.RegexContext RegExOptsChar ExtendedRegExChar.InputData target
	=> ExtendedRegExChar.InputData	-- ^ The input data.
	-> String			-- ^ The string from which to read the regex-specification.
	-> target			-- ^ The polymorphic return-type.
inputData =~ s	= (RegexLike.makeRegex s :: RegExOptsChar) `RegexLike.match` inputData