{-# LANGUAGE CPP #-}
{-
Copyright (C) 2010-2015 Dr. Alistair Ward
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-}
{- |
[@AUTHOR@] Dr. Alistair Ward
[@DESCRIPTION@]
* A data-type, which represents the permissible range of instances, of the underlying polymorphic datum.
* Designed for use in a polymorphic /regex/-engine, which specifies patterns composed of repeated /greedy/ & /non-greedy/ sequences of /Meta/-data;
> * + ? {fewest, most} {fewest,} {fewest}
> *? +? ?? {fewest, most}? {fewest,}?
* In the context of /regex/es, this concept is known as /Quantification/.
* /regex/es evolved from the minimal ability to optionally qualify the datum with a suffix.
More exotic repetition-specifications could be composed by concatenating these atomic building-blocks.
Here, I've taken the contrary top-down view, & assumed that all data are qualified by a full 'RepetitionBounds', which in most cases will degenerate into a simpler form.
* The type of entity which is being repeated, isn't the domain of this data-type; it's polymorphic.
-}
module RegExDot.Repeatable(
-- * Types
-- ** Type-synonyms
Repetitions,
RepetitionBounds,
-- ** Data-types
Repeatable(..),
-- * Constants
-- nonGreedyToken,
oneOrMoreToken,
rangeDelimiters,
rangeSeparatorToken,
tokens,
zeroOrMoreToken,
zeroOrOneToken,
-- * Functions
one,
oneOrMore,
oneOrMore',
zeroOrMore,
zeroOrMore',
zeroOrOne,
zeroOrOne',
-- precisely,
repeatableParser,
showSuffix,
-- ** Accessors
getFewest,
getMost,
-- ** Mutators
focus,
-- setNonGreedy,
toSingleton,
-- ** Operators
(^#->#),
(^#->#?),
(^#->),
(^#->?),
(^#),
-- ** Predicates
isPrecise,
hasPreciseBounds
) where
import Control.Arrow((***))
import qualified Control.DeepSeq
import qualified Data.List
import qualified RegExDot.Consumer as Consumer
import qualified RegExDot.ConsumptionProfile as ConsumptionProfile
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec((>))
import qualified ToolShed.Data.Pair
import qualified ToolShed.SelfValidate
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative((<$>), (<*>))
#endif
infix 6 ^#->#, ^#->#?, ^#->, ^#->?, ^# -- A notch tighter than "DSL"s binary operators.
-- | A number of repetitions.
type Repetitions = Int
-- | Defines the bounds of a range of permissible repetitions.
type RepetitionBounds = (Repetitions, Maybe Repetitions)
-- | Creates a precise 'RepetitionBounds', i.e. both lower & upper bounds on the number of 'Repetitions' are equal to the same value.
precisely :: Repetitions -> RepetitionBounds
precisely i = (i, Just i)
-- | Predicate which is 'True' if exactly one value is permissible, ie lower & upper bounds on the number of 'Repetitions' are identical.
hasPreciseBounds :: RepetitionBounds -> Bool
hasPreciseBounds (fewest, most) = Just fewest == most
-- | Declares a polymorphic data-type, which augments the underlying 'base' datum, with the range of times it may be used.
data Repeatable a = MkRepeatable {
base :: a, -- ^ The underlying polymorphic datum.
repetitionBounds :: RepetitionBounds, -- ^ The bounds delimiting the range of permissible repetitions, of 'base'.
isGreedy :: Bool -- ^ Whether to demand as many matching instances of 'base' as possible; or as few (AKA /lazy quantification/).
} deriving Eq
instance Functor Repeatable where
fmap f repeatable = repeatable { base = f $ base repeatable }
-- | True if there's no choice in the number of repetitions; implemented via 'isPrecise'.
isPrecise :: Repeatable a -> Bool
isPrecise = hasPreciseBounds . repetitionBounds
-- | Builds a parser for a specification of the number of permissible instances of the specified polymorphic parameter.
repeatableParser :: a -> Parsec.Parser (Repeatable a)
repeatableParser b = Parsec.option (
one b -- The default; there's no concept of greediness here.
) $ do
repeatable <- Parsec.choice [
(Parsec.char oneOrMoreToken > "Repeatable.oneOrMoreToken " ++ show oneOrMoreToken) >> return {-to ParsecT-monad-} (oneOrMore b),
(Parsec.char zeroOrOneToken > "Repeatable.zeroOrOneToken " ++ show zeroOrOneToken) >> return {-to ParsecT-monad-} (zeroOrOne b),
(Parsec.char zeroOrMoreToken > "Repeatable.zeroOrMoreToken " ++ show zeroOrMoreToken) >> return {-to ParsecT-monad-} (zeroOrMore b),
(b ^#->#) {-arbitrarily greedy for now-} <$> uncurry Parsec.between (ToolShed.Data.Pair.mirror Parsec.char rangeDelimiters) (
do
fewest <- Parsec.spaces >> (read <$> Parsec.many1 Parsec.digit > "Repetition-range minimum")
most <- Parsec.spaces >> Parsec.option (
Just fewest -- The default.
) (
do
i <- (
Parsec.char rangeSeparatorToken > "Repeatable.rangeSeparatorToken " ++ show rangeSeparatorToken
) >> Parsec.spaces >> Parsec.option Nothing {-default to open-ended range-} (
Just . read <$> Parsec.many1 Parsec.digit > "Repetition-range maximum"
)
Parsec.spaces >> return {-to ParsecT-monad-} i
)
return {-to ParsecT-monad-} (fewest, most)
) > "Repeatable.rangeDelimiters " ++ show rangeDelimiters
]
g <- Parsec.option True {-the default-} $ (Parsec.char nonGreedyToken > "Repeatable.nonGreedyToken " ++ show nonGreedyToken) >> return {-to ParsecT-monad-} False
return {-to ParsecT-monad-} repeatable { isGreedy = g } -- Correct prior assumption.
instance Read a => Read (Repeatable a) where
readsPrec _ s = case reads s {-first, read the base-type-} of
[(base', s1)] -> (error . ("readsPrec Repeatable:\tparse-error; " ++) . show) `either` return $ Parsec.parse ((,) <$> repeatableParser base' <*> Parsec.getInput) "Repeatable" s1
_ -> [] -- No parse.
{- |
* A 'ShowS'-function for the suffix, denoting the permissible repetitions, of 'base'.
* This function converts the internal, into the tradition /greedy/ & /non-greedy/ quantifiers of various specific varieties.
-}
showSuffix :: Repeatable a -> ShowS
showSuffix repeatable = let
showRange :: ShowS -> ShowS
showRange x = (\(begin, end) -> begin . x . end) $ ToolShed.Data.Pair.mirror showChar rangeDelimiters
in (
case repetitionBounds repeatable of
(0, Nothing) -> showChar zeroOrMoreToken
(1, Nothing) -> showChar oneOrMoreToken
(fewest, Nothing) -> showRange $ shows fewest . showChar rangeSeparatorToken
(0, Just 1) -> showChar zeroOrOneToken
(1, Just 1) -> id -- CAVEAT: since there's no explicit repetition-operator, the non-greedy modifier can't be appended.
(fewest, Just most) -> showRange $ if fewest == most
then shows fewest -- Single-valued range.
else shows fewest . showChar rangeSeparatorToken . shows most
) . if ($ repeatable) `any` [isGreedy, isPrecise] {-without a range of possibilities, non-greediness is irrelevant-}
then id
else showChar nonGreedyToken -- This can only be appended, if there a previous repetition-operator for it to modify.
-- Replicate the syntax, for repetition, as used in a POSIX-standard /regex/.
instance Show a => Show (Repeatable a) where
showsPrec _ repeatable = shows (base repeatable) . showSuffix repeatable
instance Consumer.Consumer a => Consumer.Consumer (Repeatable a) where
consumptionProfile MkRepeatable {
base = b,
repetitionBounds = (fewest, most)
} = baseConsumptionProfile {
ConsumptionProfile.consumptionBounds = (fewest *) *** ((*) <$> most <*>) $ ConsumptionProfile.consumptionBounds baseConsumptionProfile -- CAVEAT: special cases exist, where one or both halves of this calculation degenerate to a simpler form, but special treatment, in an attempt to improve performance, proved counterproductive.
} where
baseConsumptionProfile :: ConsumptionProfile.ConsumptionProfile
baseConsumptionProfile = Consumer.consumptionProfile b
starHeight MkRepeatable {
base = b,
repetitionBounds = r
} = Consumer.starHeight b + if hasPreciseBounds r then 0 else 1
instance ToolShed.SelfValidate.SelfValidator a => ToolShed.SelfValidate.SelfValidator (Repeatable a) where
getErrors MkRepeatable {
base = b,
repetitionBounds = (fewest, most),
isGreedy = g
}
| not $ ToolShed.SelfValidate.isValid b = ToolShed.SelfValidate.getErrors b -- Delegate.
| otherwise = ToolShed.SelfValidate.extractErrors [
(fewest < 0, "Negative fewest=" ++ show fewest ++ "."),
(
case most of
Just m -> m < fewest
_ -> False,
"Invalid repetition-range; '" ++ show (fewest, most) ++ "'."
), (
not g && case most of
Just m -> fewest >= m -- There ought to be potential for non-greediness, where specified: the converse isn't true, since greediness isn't explicit, & may not have been wanted.
_ -> False,
"Invalid non-greedy repetition-range; '" ++ show (fewest, most) ++ "'."
)
]
instance Control.DeepSeq.NFData a => Control.DeepSeq.NFData (Repeatable a) where
rnf MkRepeatable {
base = b,
repetitionBounds = r,
isGreedy = g
} = Control.DeepSeq.rnf (b, r, g)
-- | Mutator.
setNonGreedy :: Repeatable a -> Repeatable a
setNonGreedy r = r { isGreedy = False }
{- |
* Construct a greedy 'Repeatable', from a polymorphic datum, with the specified range of permissible instances.
* The /#/s in the identifier represent the two bounds.
* /a{f, m}/
-}
(^#->#)
:: a -- ^ The polymorphic payload from which to construct the 'Repeatable'.
-> RepetitionBounds -- ^ The permissible repetition-bounds for the polymorphic data.
-> Repeatable a
b ^#-># bounds = MkRepeatable {
base = b,
repetitionBounds = bounds,
isGreedy = True
}
{- |
* Construct a non-greedy version of '^#->#'.
* /a{f, m}?/
-}
(^#->#?)
:: a -- ^ The polymorphic payload from which to construct the 'Repeatable'.
-> RepetitionBounds -- ^ The permissible repetition-bounds for the polymorphic data.
-> Repeatable a
b ^#->#? bounds = setNonGreedy (b ^#-># bounds)
{- |
* Construct a greedy 'Repeatable', tailored for data repeated at least the specified number of times.
* The /#/ in the identifier represents the single bound.
* /a{f,}/
-}
(^#->)
:: a -- ^ The polymorphic payload from which to construct the 'Repeatable'.
-> Repetitions -- ^ The minimum permissible repetitions of the polymorphic data.
-> Repeatable a
b ^#-> fewest = b ^#-># (fewest, Nothing)
{- |
* Construct a non-greedy version of '^#->'.
* /a{f,}?/
-}
(^#->?)
:: a -- ^ The polymorphic payload from which to construct the 'Repeatable'.
-> Repetitions -- ^ The minimum permissible repetitions of the polymorphic data.
-> Repeatable a
b ^#->? fewest = setNonGreedy (b ^#-> fewest)
{- |
* Construct a 'Repeatable', tailored for data repeated a precise number of times.
* The /#/ in the identifier represents the single bound.
* /a{f}/
-}
(^#)
:: a -- ^ The polymorphic payload from which to construct the 'Repeatable'.
-> Repetitions -- ^ The precise number of repetitions of the polymorphic data which is required.
-> Repeatable a
b ^# r = b ^#-># precisely r
{- |
* Construct a 'Repeatable', tailored for unrepeated data.
* A degenerate case of '^#'.
-}
one :: a -> Repeatable a
one = (^# 1)
{- |
* Construct a greedy 'Repeatable', from a polymorphic datum, with 'fewest' == 0 & 'most' == 1.
* A specific case of '^#->#'.
-}
zeroOrOne :: a -> Repeatable a
zeroOrOne = (^#-># (0, Just 1))
-- | Construct a non-greedy version of 'zeroOrOne'.
zeroOrOne' :: a -> Repeatable a
zeroOrOne' = setNonGreedy . zeroOrOne
{- |
* Construct a greedy 'Repeatable', from a polymorphic datum, with 'fewest' == 0.
* A specific case of '^#->'.
-}
zeroOrMore :: a -> Repeatable a
zeroOrMore = (^#-> 0)
-- | Construct a non-greedy version of 'zeroOrMore'.
zeroOrMore' :: a -> Repeatable a
zeroOrMore' = setNonGreedy . zeroOrMore
{- |
* Construct a greedy 'Repeatable', from a polymorphic datum, with lower 'RepetitionBounds' == one.
* A specific case of '^#->'.
-}
oneOrMore :: a -> Repeatable a
oneOrMore = (^#-> 1)
-- | Construct a non-greedy version of 'oneOrMore'.
oneOrMore' :: a -> Repeatable a
oneOrMore' = setNonGreedy . oneOrMore
-- | Reduces a 'Repeatable', with a range of 'RepetitionBounds', to a precise number of repetitions.
focus :: Repeatable a -> Repetitions -> Repeatable a
focus r i = r { repetitionBounds = precisely i }
{- |
* Reduces a 'Repeatable', with a range of 'RepetitionBounds', to a singleton.
* A degenerate case of 'focus'.
-}
toSingleton :: Repeatable a -> Repeatable a
toSingleton = (`focus` 1)
-- | Accessor.
getFewest :: Repeatable a -> Repetitions
getFewest MkRepeatable { repetitionBounds = (f, _) } = f
-- | Accessor.
getMost :: Repeatable a -> Maybe Repetitions
getMost MkRepeatable { repetitionBounds = (_, m) } = m
-- | The token used to denote /non-greedy/, when in the 'String'-form.
nonGreedyToken :: Char
nonGreedyToken = '?'
{- |
* The token used to denote 'zeroOrMore', when in the 'String'-form.
* AKA /Kleene Star/.
-}
zeroOrMoreToken :: Char
zeroOrMoreToken = '*'
-- | The token used to denote 'zeroOrOne', when in the 'String'-form.
zeroOrOneToken :: Char
zeroOrOneToken = '?'
-- | The token used to denote 'oneOrMore', when in the 'String'-form.
oneOrMoreToken :: Char
oneOrMoreToken = '+'
-- | The delimiters of '^#->#', when in the 'String'-form.
rangeDelimiters :: (Char, Char)
rangeDelimiters = ('{', '}')
-- | The token used to separate 'RepetitionBounds', when in the 'String'-form.
rangeSeparatorToken :: Char
rangeSeparatorToken = ','
-- | The set of 'Char' to which a specific meaning is attributed, when reading from 'String'.
tokens :: String
tokens = Data.List.nub [nonGreedyToken, zeroOrMoreToken, zeroOrOneToken, oneOrMoreToken, fst rangeDelimiters, snd rangeDelimiters, rangeSeparatorToken]