{-# OPTIONS_GHC -fno-warn-orphans #-}
{-
	Copyright (C) 2010 Dr. Alistair Ward

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{- |
 [@AUTHOR@]	Dr. Alistair Ward

 [@DESCRIPTION@]

	* Implements 'Meta.ShortcutExpander' 'Char', to implement some of the traditional /Perl-style shortcuts/:

	[@\\d@]		=> Any 'Data.Char.isDigit'-character.

	[@\\D@]		=> Any non-'Data.Char.isDigit' character.

	[@\\s@]		=> Any 'Data.Char.isSpace'-character.

	[@\\S@]		=> Any non-'Data.Char.isSpace' character.

	[@\\w@]		=> Any 'Data.Char.isWord' letter.

	[@\\W@]		=> Any non-'Data.Char.isWord' letter.

	* Enables one to compose concise regexen, containing any 'Char' that's a member of one of these predefined sets.

	* Though 'Meta.Meta' is polymorphic, & the type-parameter can't be assumed to implement either 'Enum' or 'Ord',
	'Char' actually does, so this module is able to implement ranges within a /Bracket-expression/.

	* Defines specialised instances of 'Read' & 'Show', to cope with /Perl-style shortcuts/, /Posix Character-classes/ & /Bracket-expression/ range-specifications.

 [@TODO@]	Use @Environment.getLocale@. Regrettably, this returns in the "IO"-monad, & even it didn't, how does one pass that information to 'Read' ?!
-}

module RegExChar.MetaChar(
-- * Types
--	AssociationList,
--	Dictionary,
-- ** Type-synonyms
	MetaChar(..),
-- * Constants
--	bracketExpressionRangeToken,
--	posixCharacterClassDelimiters,
-- * Functions
--	implementPerlShortcut,
	metaCharParser,
-- ** Accessors (Deconstructors)
	deconstruct
) where

import			Control.Applicative((<$>))
import			Control.Arrow((***))
import			Text.ParserCombinators.Parsec((<?>))
import qualified	Control.Monad
import qualified	Data.Char
import qualified	Data.Map
import qualified	RegExDot.BracketExpression		as BracketExpression
import qualified	RegExDot.BracketExpressionMember	as BracketExpressionMember
import qualified	RegExDot.Consumer			as Consumer
import qualified	RegExDot.Meta				as Meta
import qualified	RegExDot.RegEx				as RegEx
import qualified	RegExDot.Repeatable			as Repeatable
import qualified	RegExDot.ShowablePredicate		as ShowablePredicate
import qualified	Text.ParserCombinators.Parsec		as Parsec
import qualified	ToolShed.SelfValidate			as SelfValidate

-- | Holds a mapping from an escape-sequence & the predicate via which it is implemented.
type AssociationList key	= [(key, ShowablePredicate.Predicate Char)]

-- | Holds a mapping from an escape-sequence & the predicate via which it is implemented.
type Dictionary key		= Data.Map.Map key (ShowablePredicate.Predicate Char)

instance BracketExpressionMember.ShortcutExpander Char	where
	findPredicate shortcut	= ShowablePredicate.MkShowablePredicate [Meta.shortcutToken, shortcut] <$> shortcut `Data.Map.lookup` dictionary where
		dictionary :: Dictionary Char
		dictionary	= Data.Map.fromList $ perlShortcuts ++ invert perlShortcuts	where
			perlShortcuts :: AssociationList Char
			perlShortcuts	= [
				('d',	Data.Char.isDigit),
				('s',	Data.Char.isSpace),
				('w',	\c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')])
			 ]

			invert :: AssociationList Char -> AssociationList Char
			invert	= map (Data.Char.toUpper *** (not .))

instance Meta.ShortcutExpander Char	where
	expand c	= case BracketExpressionMember.findPredicate c of
		Just showablePredicate	-> Meta.Predicate showablePredicate
		_			-> Meta.Literal c

-- | A specialised instance, encapsulated to permit tailored instance-declarations.
newtype MetaChar	= MkMetaChar (Meta.Meta Char)	deriving Eq

-- | Accessor.
deconstruct :: MetaChar -> Meta.Meta Char
deconstruct (MkMetaChar metaChar)	= metaChar

instance SelfValidate.SelfValidator MetaChar	where
	isValid	= SelfValidate.isValid . deconstruct

instance Consumer.Consumer MetaChar	where
	consumptionProfile	= Consumer.consumptionProfile . deconstruct
	starHeight		= Consumer.starHeight . deconstruct

-- | Builds a parser of traditional regex-syntax, which understands 'Char'-specific concepts like /Perl-style shortcuts/ & /Posix Character-classes/.
metaCharParser :: Parsec.Parser MetaChar
metaCharParser	= MkMetaChar <$> Parsec.choice [
	(Parsec.char Meta.anyToken <?> "Meta.anyToken " ++ show Meta.anyToken) >> return {-to GenParser-monad-} Meta.Any,
	(Parsec.char Meta.shortcutToken <?> "Meta.shortcutToken " ++ show Meta.shortcutToken) >> Meta.expand <$> Parsec.anyChar,
	uncurry Parsec.between (Parsec.char *** Parsec.char $ BracketExpression.delimiterTokens) (
		do
			let
				implementPosixCharacterClass :: String -> Maybe (ShowablePredicate.ShowablePredicate Char)
				implementPosixCharacterClass identifier	= ShowablePredicate.MkShowablePredicate (
					fst posixCharacterClassDelimiters ++ identifier ++ snd posixCharacterClassDelimiters
				 ) <$> identifier `Data.Map.lookup` dictionary	where
					dictionary :: Dictionary String
					dictionary	= Data.Map.fromList $ posixCharacterClasses ++ invert posixCharacterClasses	where
						posixCharacterClasses :: AssociationList String
						posixCharacterClasses	= [
							("alnum",	Data.Char.isAlphaNum),
							("alpha",	Data.Char.isAlpha),
							("ascii",	Data.Char.isAscii),
							("blank",	(`elem` " \t")),
							("cntrl",	Data.Char.isControl),
							("digit",	Data.Char.isDigit),
							("graph",	\c	-> not $ ($ c) `any` [Data.Char.isSpace, Data.Char.isControl]),
							("lower",	Data.Char.isLower),
							("print",	Data.Char.isPrint),
							("punct",	Data.Char.isPunctuation),
							("space",	Data.Char.isSpace),
							("upper",	Data.Char.isUpper),
							("word",	\c	-> ($ c) `any` [Data.Char.isAlphaNum, (== '_')]),
							("xdigit",	Data.Char.isHexDigit)
						 ]

						invert :: AssociationList String -> AssociationList String
						invert	= map $ (BracketExpression.negationToken :) *** (not .)

			cTor	<- Parsec.option Meta.AnyOf {-default-} $ (Parsec.char BracketExpression.negationToken <?> "BracketExpression.negationToken " ++ show BracketExpression.negationToken) >> return {-to GenParser-monad-} Meta.NoneOf

			literalBracketExpressionTerminator	<- Parsec.option [] {-default-} $ return {-to List-monad-} . BracketExpressionMember.Literal <$> (
				Parsec.char (snd BracketExpression.delimiterTokens)	<?> "Literal Bracket-expression terminator " ++ show (snd BracketExpression.delimiterTokens)
			 ) --If the first item in a BracketExpression (or negated BracketExpression) is the terminator-token, then it is treated as a 'BracketExpressionMember.Literal'.

			cTor . (literalBracketExpressionTerminator ++) <$> Parsec.many {-potentially zero-} (
				Parsec.choice [
					(
						do
							_	<- Parsec.char Meta.shortcutToken	<?> "Meta.shortcutToken " ++ show Meta.shortcutToken
							c	<- Parsec.anyChar

							return {-to GenParser-monad-} $ case BracketExpressionMember.findPredicate c of
								Just showablePredicate	-> BracketExpressionMember.Predicate showablePredicate
								_			-> BracketExpressionMember.Literal c	--Escaped literal.
					) <?> "Perl-style shortcut",
					Parsec.try (
						uncurry Parsec.between (Parsec.string *** Parsec.string $ posixCharacterClassDelimiters) (
							do
								identifier	<- Parsec.many1 $ Parsec.noneOf [head $ snd posixCharacterClassDelimiters]

								case implementPosixCharacterClass identifier of
									Just showablePredicate	-> return {-to GenParser-monad-} $ BracketExpressionMember.Predicate showablePredicate
									_			-> Parsec.unexpected $ "MetaChar.metaCharParser:\tunrecognised Posix Character-class; " ++ show identifier
						) <?> "Posix Character-class " ++ show posixCharacterClassDelimiters
					), --Regurgitate erroneously consumed input.
					Parsec.try (
						(
							do
								rangeStart	<- Parsec.noneOf [snd BracketExpression.delimiterTokens]
								_		<- Parsec.char bracketExpressionRangeToken	<?> "bracketExpressionRangeToken " ++ show bracketExpressionRangeToken
								rangeEnd	<- Parsec.noneOf [snd BracketExpression.delimiterTokens]

								return {-to GenParser-monad-} . BracketExpressionMember.Predicate . ShowablePredicate.MkShowablePredicate [
									rangeStart,
									bracketExpressionRangeToken,
									rangeEnd
								 ] $ \c -> rangeStart <= c && c <= rangeEnd	--Create custom predicate, utilising "Ord Char".
						) <?> "Bracket-expression range"
					), --Regurgitate erroneously consumed input.
					BracketExpressionMember.Literal <$> Parsec.noneOf [snd BracketExpression.delimiterTokens]	<?> "BracketExpressionMember.Literal"	--TODO: the first Char-member can be a literal ']'.
				] <?> "Bracket-expression member"
			 ) <?> "Bracket-expression member-list"
	) <?> "BracketExpression.delimiterTokens " ++ show BracketExpression.delimiterTokens,
	Meta.Literal <$> Parsec.noneOf RegEx.tokens
 ]

instance Read MetaChar	where
	readsPrec _	= (
		(error . ("readsPrec RegExChar.MetaChar:\tparse-error; " ++) . show) `either` return
	 ) . Parsec.parse (
		Control.Monad.liftM2 (,) metaCharParser Parsec.getInput
	 ) "MetaChar"

-- | The token used to signify an ordered range of members in a /Bracket-expression/.
bracketExpressionRangeToken :: Char
bracketExpressionRangeToken	= '-'

-- | The delimiters of a /Posix Character-class/.
posixCharacterClassDelimiters :: (String, String)
posixCharacterClassDelimiters	= ("[:", ":]")

instance Show MetaChar	where
	showsPrec _ (MkMetaChar Meta.Any)				= showChar Meta.anyToken
	showsPrec _ (MkMetaChar (Meta.Literal c))			= (
		if c `elem` [
			fst BracketExpression.delimiterTokens,
			fst Repeatable.rangeDelimiters,
			Repeatable.oneOrMoreToken,
			Repeatable.zeroOrMoreToken,
			Repeatable.zeroOrOneToken,
			Meta.anyToken,
			Meta.shortcutToken
		] ++ RegEx.tokens
			then showChar Meta.shortcutToken
			else id
	 ) . showChar c
	showsPrec _ (MkMetaChar (Meta.AnyOf bracketExpression))		= showChar (
		fst BracketExpression.delimiterTokens
	 ) . showString (
		foldr (
			\e	-> case e of
				BracketExpressionMember.Predicate showablePredicate	-> shows showablePredicate
				BracketExpressionMember.Literal literal			-> (
					if literal `elem` [
						bracketExpressionRangeToken,		--CAVEAT: only unambiguously literal when at the start or end of a "BracketExpression".
						Meta.shortcutToken,
						snd BracketExpression.delimiterTokens	--CAVEAT: only unambiguously literal when at the start of a "BracketExpression".
					]
						then showChar Meta.shortcutToken
						else id
				 ) . showChar literal
		) (
			showChar (snd BracketExpression.delimiterTokens) ""	--Initial value.
		) bracketExpression
	 )
	showsPrec _ (MkMetaChar (Meta.NoneOf bracketExpression))	= showChar x . showChar BracketExpression.negationToken . showString xs	where (x : xs)	= show . MkMetaChar $ Meta.AnyOf bracketExpression
	showsPrec _ (MkMetaChar m)					= shows m