{-# OPTIONS_GHC -fno-warn-orphans #-}
{-
	Copyright (C) 2010 Dr. Alistair Ward

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{- |
 [@AUTHOR@]	Dr. Alistair Ward

 [@DESCRIPTION@]	An 'RegEx.ExtendedRegEx', which has been specialised for 'Char', to create a tradition non-polymorphic /regex/.
-}

module RegExChar.ExtendedRegExChar(
-- * Types
-- ** Type-synonyms
	ExtendedRegExChar(..),
	InputData,
-- * Functions
-- ** Operators
	(+~),
	(=~),
	(/~)
) where

import			Control.Applicative((<$>))
import			Control.Arrow((***))
import			RegExDot.DSL((<~>), (-:))
import			Text.ParserCombinators.Parsec((<?>), (<|>))
import qualified	Control.Monad
import qualified	Data.List
import qualified	RegExChar.MetaChar		as MetaChar
import qualified	RegExDot.Anchor			as Anchor
import qualified	RegExDot.Consumer		as Consumer
import qualified	RegExDot.RegEx			as RegEx
import qualified	RegExDot.RegExOpts		as RegExOpts
import qualified	RegExDot.Repeatable		as Repeatable
import qualified	Text.ParserCombinators.Parsec	as Parsec
import qualified	ToolShed.SelfValidate		as SelfValidate

infix 4 +~, =~, /~	--Same as (==) & (/=).

-- | Specialise a 'RegEx.ExtendedRegEx' for 'Char', & encapsulate it to permit tailored instance-declarations.
data ExtendedRegExChar	= MkExtendedRegExChar {
	hasNonCapturingTopLevelAlternatives	:: Bool,	-- ^ The string from which a 'RegEx.ExtendedRegEx' is read, may, if data-capture isn't required, omit explicit delimiters around top-level 'RegEx.Alternatives'.
	extendedRegEx				:: RegEx.ExtendedRegEx Char
} deriving Eq

-- | Abbreviation.
type InputData	= RegEx.InputData Char

instance SelfValidate.SelfValidator ExtendedRegExChar	where
	isValid MkExtendedRegExChar {
		hasNonCapturingTopLevelAlternatives	= hasNonCapturingTopLevelAlternatives',
		extendedRegEx				= extendedRegEx'
	} = (not hasNonCapturingTopLevelAlternatives' || all (RegEx.isCaptureGroup . Repeatable.base) (RegEx.concatenation extendedRegEx')) && SelfValidate.isValid extendedRegEx'

instance Consumer.Consumer ExtendedRegExChar	where
	consumptionProfile	= Consumer.consumptionProfile . extendedRegEx
	starHeight		= Consumer.starHeight . extendedRegEx

instance RegEx.ShortcutExpander Char	where
	expand c	= error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'."

instance Read ExtendedRegExChar	where
	readsPrec _ s
		| s == reverse Anchor.tokens	= [
			(
				MkExtendedRegExChar {
					hasNonCapturingTopLevelAlternatives	= False,
					extendedRegEx				= (Just Anchor.Bow, Just Anchor.Stern) <~> []
				},
				""
			)
		] --The order of adjacent zero-width assertions is irrelevant.
		| otherwise			= let
			extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar
			extendedRegExCharParser	= reduce {-correct prior assumption-} <$> alternativesParser {-assume non-capturing top-level Alternatives-} where
				reduce :: RegEx.Alternatives Char -> ExtendedRegExChar
				reduce alternatives
					| RegEx.isSingletonAlternatives alternatives	= MkExtendedRegExChar False . head $ RegEx.deconstructAlternatives alternatives
					| otherwise					= MkExtendedRegExChar True $ (Nothing, Nothing) <~> RegEx.CaptureGroup alternatives -: []	--Infer non-capturing top-level 'RegEx.Alternatives' from the presence of 'RegEx.alternativeExtendedRegExSeparatorToken's.

				alternativesParser :: Parsec.Parser (RegEx.Alternatives Char)
				alternativesParser	= RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegEx.alternativeExtendedRegExSeparatorToken <?> "RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegEx.alternativeExtendedRegExSeparatorToken)	where
					extendedRegExParser :: Parsec.Parser (RegEx.ExtendedRegEx Char)
					extendedRegExParser	= do
						maybeBowAnchor		<- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.bowToken <?> "Anchor.bowToken " ++ show Anchor.bowToken) >> return {-to GenParser-monad-} (Just Anchor.Bow)
						repeatableRequirementList	<- repeatableRequirementListParser

						(
							do
								repeatableCaptureGroup	<- Repeatable.repeatableParser . RegEx.CaptureGroup =<< uncurry Parsec.between (
									Parsec.char *** Parsec.char $ RegEx.captureGroupDelimiters
								 ) alternativesParser {-recurse-} <?> "RegEx.captureGroupDelimiters " ++ show RegEx.captureGroupDelimiters

								extendedRegEx'	<- extendedRegExParser	--Recurse.

								return {-to GenParser-monad-} $ RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegEx.bowAnchor = maybeBowAnchor }
						 ) <|> (
							do
								maybeSternAnchor	<- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.sternToken <?> "Anchor.sternToken " ++ show Anchor.sternToken) >> return {-to GenParser-monad-} (Just Anchor.Stern)

								return {-to GenParser-monad-} RegEx.MkExtendedRegEx {
									RegEx.bowAnchor		= maybeBowAnchor,
									RegEx.concatenation	= repeatableRequirementList,
									RegEx.sternAnchor	= maybeSternAnchor
								}
						 )
						where
							repeatableRequirementListParser :: Parsec.Parser (RegEx.Concatenation Char)
							repeatableRequirementListParser	= Parsec.choice [
								Parsec.try . Parsec.lookAhead $ (
									Parsec.char Anchor.sternToken <?> "Anchor.sternToken " ++ show Anchor.sternToken
								) >> (
									(
										Parsec.eof >> return {-to GenParser-monad-} []
									) <|> (
										Parsec.oneOf [RegEx.alternativeExtendedRegExSeparatorToken, snd RegEx.captureGroupDelimiters] >> return {-to GenParser-monad-} []
									)
								),
								Control.Monad.liftM2 (:) (
									 MetaChar.metaCharParser >>= Repeatable.repeatableParser . RegEx.Require . MetaChar.deconstruct
								) repeatableRequirementListParser, {-recurse-}
								return {-to GenParser-monad-} []
							 ]

		in (error . ("readsPrec RegExChar.ExtendedRegExChar:\tparse-error; " ++) . show) `either` (filter (SelfValidate.isValid . fst) . return) $ Parsec.parse (
			Control.Monad.liftM2 (,) extendedRegExCharParser Parsec.getInput
		) "ExtendedRegExChar" s

instance Show ExtendedRegExChar	where
	showsPrec _ MkExtendedRegExChar {
		hasNonCapturingTopLevelAlternatives	= hasNonCapturingTopLevelAlternatives',
		extendedRegEx				= RegEx.MkExtendedRegEx {
			RegEx.bowAnchor		= maybeBowAnchor,
			RegEx.concatenation	= concatenation',
			RegEx.sternAnchor	= maybeSternAnchor
		}
	} = RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) (showString "") (
		(
			let
				showAlternatives :: RegEx.Alternatives Char -> [ShowS]
				showAlternatives	= Data.List.intersperse (showChar RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegEx.deconstructAlternatives
			in if hasNonCapturingTopLevelAlternatives'
				then map (
					\repeatablePattern	-> case Repeatable.base repeatablePattern of
						RegEx.CaptureGroup alternatives	-> foldr (.) (showString "") $ showAlternatives alternatives
						_				-> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern
				)
				else map (
					\repeatablePattern	-> (
						case Repeatable.base repeatablePattern of
							RegEx.Require meta		-> shows $ MetaChar.MkMetaChar meta
							RegEx.CaptureGroup alternatives	-> showChar (
								fst RegEx.captureGroupDelimiters
							 ) . foldr (.) (
								showChar $ snd RegEx.captureGroupDelimiters	--Initial value.
							 ) (
								showAlternatives alternatives
							 )
					) . Repeatable.showSuffix repeatablePattern
				)
		 ) concatenation'
	 ) . RegEx.showsMaybeAnchor maybeSternAnchor

-- | A veneer over the underlying polymorphic operator, 'RegEx.+~'.
(+~) ::
	InputData					-- ^ The input-data string.
	-> RegExOpts.RegExOpts ExtendedRegExChar	-- ^ The match-options, parameterised by the regex-specification.
	-> RegEx.Result Char
inputData +~ regExOpts	= inputData RegEx.+~ fmap extendedRegEx regExOpts	--CAVEAT: <http://hackage.haskell.org/trac/haskell-prime/wiki/QualifiedOperators>.

-- | A veneer over the underlying polymorphic operator, 'RegEx.=~'.
(=~) ::
	InputData					-- ^ The input-data string.
	-> RegExOpts.RegExOpts ExtendedRegExChar	-- ^ The match-options, parameterised by the regex-specification.
	-> Bool
inputData =~ regExOpts	= inputData RegEx.=~ fmap extendedRegEx regExOpts	--CAVEAT: <http://hackage.haskell.org/trac/haskell-prime/wiki/QualifiedOperators>.

-- | Pattern-mismatch operator.
(/~) ::
	InputData					-- ^ The input-data string.
	-> RegExOpts.RegExOpts ExtendedRegExChar	-- ^ The match-options, parameterised by the regex-specification.
	-> Bool
(/~) inputData	= not . (inputData =~)