-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Replaces/Enhances Text.Regex
--
@package regex-tdfa
@version 1.2.1
module Text.Regex.TDFA.NewDFA.Uncons
class Uncons a
uncons :: Uncons a => a -> Maybe (Char, a)
instance Uncons ByteString
instance Uncons ByteString
instance Uncons (Seq Char)
instance Uncons [Char]
-- | This creates a lazy Trie based on a finite range of Ints and is used
-- to memorize a function over the subsets of this range.
--
-- To create a Trie you need two supply 2 things * Range of keys to bound
-- * A function or functions used to construct the value for a subset of
-- keys
--
-- The Trie uses the Array type internally.
module Text.Regex.TDFA.IntArrTrieSet
data TrieSet v
TrieSet :: v -> Array Int (TrieSet v) -> TrieSet v
value :: TrieSet v -> v
next :: TrieSet v -> Array Int (TrieSet v)
-- | This is the accessor for the Trie. The list of keys should be sorted.
lookupAsc :: TrieSet v -> [Int] -> v
-- | This is a Trie constructor for a complete range of keys.
fromBounds :: (Int, Int) -> ([Int] -> v) -> TrieSet v
-- | This is a Trie constructor for a complete range of keys that uses a
-- function from single values and a merge operation on values to fill
-- the Trie.
fromSinglesMerge :: v -> (v -> v -> v) -> (Int, Int) -> (Int -> v) -> TrieSet v
-- | This is a Trie constructor for a complete range of keys that uses a
-- function from single values and a sum operation of values to fill the
-- Trie.
fromSinglesSum :: ([v] -> v) -> (Int, Int) -> (Int -> v) -> TrieSet v
module Data.IntSet.EnumSet2
newtype EnumSet e
EnumSet :: IntSet -> EnumSet e
unEnumSet :: EnumSet e -> IntSet
(\\) :: Enum e => EnumSet e -> EnumSet e -> EnumSet e
null :: Enum e => EnumSet e -> Bool
size :: Enum e => EnumSet e -> Int
member :: Enum e => e -> EnumSet e -> Bool
notMember :: Enum e => Int -> EnumSet e -> Bool
isSubsetOf :: Enum e => EnumSet e -> EnumSet e -> Bool
isProperSubsetOf :: Enum e => EnumSet e -> EnumSet e -> Bool
empty :: Enum e => EnumSet e
singleton :: Enum e => e -> EnumSet e
insert :: Enum e => e -> EnumSet e -> EnumSet e
delete :: Enum e => e -> EnumSet e -> EnumSet e
union :: Enum e => EnumSet e -> EnumSet e -> EnumSet e
unions :: Enum e => [EnumSet e] -> EnumSet e
difference :: Enum e => EnumSet e -> EnumSet e -> EnumSet e
intersection :: Enum e => EnumSet e -> EnumSet e -> EnumSet e
filter :: Enum e => (e -> Bool) -> EnumSet e -> EnumSet e
partition :: Enum e => (e -> Bool) -> EnumSet e -> (EnumSet e, EnumSet e)
split :: Enum e => e -> EnumSet e -> (EnumSet e, EnumSet e)
splitMember :: Enum e => e -> EnumSet e -> (EnumSet e, Bool, EnumSet e)
map :: Enum e => (e -> e) -> EnumSet e -> EnumSet e
fold :: Enum e => (e -> b -> b) -> b -> EnumSet e -> b
elems :: Enum e => EnumSet e -> [e]
toList :: Enum e => EnumSet e -> [e]
fromList :: Enum e => [e] -> EnumSet e
toAscList :: Enum e => EnumSet e -> [e]
fromAscList :: Enum e => [e] -> EnumSet e
fromDistinctAscList :: Enum e => [e] -> EnumSet e
showTree :: Enum e => EnumSet e -> String
showTreeWith :: Enum e => Bool -> Bool -> EnumSet e -> String
instance Eq (EnumSet e)
instance Ord (EnumSet e)
instance Read (EnumSet e)
instance Show (EnumSet e)
instance Monoid (EnumSet e)
module Data.IntMap.EnumMap2
newtype EnumMap k a
EnumMap :: IntMap a -> EnumMap k a
unEnumMap :: EnumMap k a -> IntMap a
(!) :: Enum key => EnumMap key a -> key -> a
(\\) :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a
null :: Enum key => EnumMap key a -> Bool
size :: Enum key => EnumMap key a -> Int
member :: Enum key => key -> EnumMap key a -> Bool
notMember :: Enum key => key -> EnumMap key a -> Bool
lookup :: Enum key => key -> EnumMap key a -> Maybe a
findWithDefault :: Enum key => a -> key -> EnumMap key a -> a
empty :: Enum key => EnumMap key a
singleton :: Enum key => key -> a -> EnumMap key a
insert :: Enum key => key -> a -> EnumMap key a -> EnumMap key a
insertWith :: Enum key => (a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a
insertWithKey :: Enum key => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a
insertLookupWithKey :: Enum key => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> (Maybe a, EnumMap key a)
delete :: Enum key => key -> EnumMap key a -> EnumMap key a
adjust :: Enum key => (a -> a) -> key -> EnumMap key a -> EnumMap key a
adjustWithKey :: Enum key => (key -> a -> a) -> key -> EnumMap key a -> EnumMap key a
update :: Enum key => (a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a
updateWithKey :: Enum key => (key -> a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a
updateLookupWithKey :: Enum key => (key -> a -> Maybe a) -> key -> EnumMap key a -> (Maybe a, EnumMap key a)
union :: Enum key => EnumMap key a -> EnumMap key a -> EnumMap key a
unionWith :: Enum key => (a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a
unionWithKey :: Enum key => (key -> a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a
unions :: Enum key => [EnumMap key a] -> EnumMap key a
unionsWith :: Enum key => (a -> a -> a) -> [EnumMap key a] -> EnumMap key a
difference :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a
differenceWith :: Enum key => (a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
differenceWithKey :: Enum key => (key -> a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
intersection :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a
intersectionWith :: Enum key => (a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
intersectionWithKey :: Enum key => (key -> a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a
map :: Enum key => (a -> b) -> EnumMap key a -> EnumMap key b
mapWithKey :: Enum key => (key -> a -> b) -> EnumMap key a -> EnumMap key b
mapAccum :: Enum key => (a -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c)
mapAccumWithKey :: Enum key => (a -> key -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c)
fold :: Enum key => (a -> b -> b) -> b -> EnumMap key a -> b
foldWithKey :: Enum key => (key -> a -> b -> b) -> b -> EnumMap key a -> b
elems :: Enum key => EnumMap key a -> [a]
keys :: Enum key => EnumMap key a -> [key]
keysSet :: Enum key => EnumMap key a -> EnumSet key
assocs :: Enum key => EnumMap key a -> [(key, a)]
toList :: Enum key => EnumMap key a -> [(key, a)]
fromList :: Enum key => [(key, a)] -> EnumMap key a
fromListWith :: Enum key => (a -> a -> a) -> [(key, a)] -> EnumMap key a
fromListWithKey :: Enum key => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a
toAscList :: Enum key => EnumMap key a -> [(key, a)]
fromAscList :: Enum key => [(key, a)] -> EnumMap key a
fromAscListWith :: Enum key => (a -> a -> a) -> [(key, a)] -> EnumMap key a
fromAscListWithKey :: Enum key => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a
fromDistinctAscList :: Enum key => [(key, a)] -> EnumMap key a
filter :: Enum key => (a -> Bool) -> EnumMap key a -> EnumMap key a
filterWithKey :: Enum key => (key -> a -> Bool) -> EnumMap key a -> EnumMap key a
partition :: Enum key => (a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a)
partitionWithKey :: Enum key => (key -> a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a)
mapMaybe :: Enum key => (a -> Maybe b) -> EnumMap key a -> EnumMap key b
mapMaybeWithKey :: Enum key => (key -> a -> Maybe b) -> EnumMap key a -> EnumMap key b
mapEither :: Enum key => (a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c)
mapEitherWithKey :: Enum key => (key -> a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c)
split :: Enum key => key -> EnumMap key a -> (EnumMap key a, EnumMap key a)
splitLookup :: Enum key => key -> EnumMap key a -> (EnumMap key a, Maybe a, EnumMap key a)
isSubmapOf :: (Enum key, Eq a) => EnumMap key a -> EnumMap key a -> Bool
isSubmapOfBy :: Enum key => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool
isProperSubmapOf :: (Enum key, Eq a) => EnumMap key a -> EnumMap key a -> Bool
isProperSubmapOfBy :: Enum key => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool
showTree :: (Enum key, Show a) => EnumMap key a -> String
showTreeWith :: (Enum key, Show a) => Bool -> Bool -> EnumMap key a -> String
instance Eq a => Eq (EnumMap k a)
instance Ord a => Ord (EnumMap k a)
instance Read a => Read (EnumMap k a)
instance Show a => Show (EnumMap k a)
instance Ord k => Foldable (EnumMap k)
instance Ord k => Functor (EnumMap k)
instance Ord k => Monoid (EnumMap k a)
module Data.IntMap.CharMap2
newtype CharMap a
CharMap :: IntMap a -> CharMap a
unCharMap :: CharMap a -> IntMap a
type Key = Char
(!) :: CharMap a -> Key -> a
(\\) :: CharMap a -> CharMap b -> CharMap a
null :: CharMap a -> Bool
size :: CharMap a -> Int
member :: Key -> CharMap a -> Bool
notMember :: Key -> CharMap a -> Bool
lookup :: Key -> CharMap a -> Maybe a
findWithDefault :: a -> Key -> CharMap a -> a
empty :: CharMap a
singleton :: Key -> a -> CharMap a
insert :: Key -> a -> CharMap a -> CharMap a
insertWith :: (a -> a -> a) -> Key -> a -> CharMap a -> CharMap a
insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> CharMap a
insertLookupWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> (Maybe a, CharMap a)
delete :: Key -> CharMap a -> CharMap a
adjust :: (a -> a) -> Key -> CharMap a -> CharMap a
adjustWithKey :: (Key -> a -> a) -> Key -> CharMap a -> CharMap a
update :: (a -> Maybe a) -> Key -> CharMap a -> CharMap a
updateWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> CharMap a
updateLookupWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> (Maybe a, CharMap a)
union :: CharMap a -> CharMap a -> CharMap a
unionWith :: (a -> a -> a) -> CharMap a -> CharMap a -> CharMap a
unionWithKey :: (Key -> a -> a -> a) -> CharMap a -> CharMap a -> CharMap a
unions :: [CharMap a] -> CharMap a
unionsWith :: (a -> a -> a) -> [CharMap a] -> CharMap a
difference :: CharMap a -> CharMap b -> CharMap a
differenceWith :: (a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a
differenceWithKey :: (Key -> a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a
intersection :: CharMap a -> CharMap b -> CharMap a
intersectionWith :: (a -> b -> a) -> CharMap a -> CharMap b -> CharMap a
intersectionWithKey :: (Key -> a -> b -> a) -> CharMap a -> CharMap b -> CharMap a
map :: (a -> b) -> CharMap a -> CharMap b
mapWithKey :: (Key -> a -> b) -> CharMap a -> CharMap b
mapAccum :: (a -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c)
mapAccumWithKey :: (a -> Key -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c)
fold :: (a -> b -> b) -> b -> CharMap a -> b
foldWithKey :: (Key -> a -> b -> b) -> b -> CharMap a -> b
elems :: CharMap a -> [a]
keys :: CharMap a -> [Key]
keysSet :: CharMap a -> IntSet
assocs :: CharMap a -> [(Key, a)]
toList :: CharMap a -> [(Key, a)]
fromList :: [(Key, a)] -> CharMap a
fromListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a
fromListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a
toAscList :: CharMap a -> [(Key, a)]
fromAscList :: [(Key, a)] -> CharMap a
fromAscListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a
fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a
fromDistinctAscList :: [(Key, a)] -> CharMap a
filter :: (a -> Bool) -> CharMap a -> CharMap a
filterWithKey :: (Key -> a -> Bool) -> CharMap a -> CharMap a
partition :: (a -> Bool) -> CharMap a -> (CharMap a, CharMap a)
partitionWithKey :: (Key -> a -> Bool) -> CharMap a -> (CharMap a, CharMap a)
mapMaybe :: (a -> Maybe b) -> CharMap a -> CharMap b
mapMaybeWithKey :: (Key -> a -> Maybe b) -> CharMap a -> CharMap b
mapEither :: (a -> Either b c) -> CharMap a -> (CharMap b, CharMap c)
mapEitherWithKey :: (Key -> a -> Either b c) -> CharMap a -> (CharMap b, CharMap c)
split :: Key -> CharMap a -> (CharMap a, CharMap a)
splitLookup :: Key -> CharMap a -> (CharMap a, Maybe a, CharMap a)
isSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool
isSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool
isProperSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool
isProperSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool
showTree :: Show a => CharMap a -> String
showTreeWith :: Show a => Bool -> Bool -> CharMap a -> String
instance Eq a => Eq (CharMap a)
instance Ord a => Ord (CharMap a)
instance Read a => Read (CharMap a)
instance Show a => Show (CharMap a)
instance Functor CharMap
instance Monoid (CharMap a)
-- | Common provides simple functions to the backend. It defines most of
-- the data types. All modules should call error via the common_error
-- function below.
module Text.Regex.TDFA.Common
look :: Int -> IntMap a -> a
common_error :: String -> String -> a
on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2
-- | after sort or sortBy the use of nubnubBy can be replaced by
-- norepnorepBy
norep :: Eq a => [a] -> [a]
-- | after sort or sortBy the use of nubnubBy can be replaced by
-- norepnorepBy
norepBy :: (a -> a -> Bool) -> [a] -> [a]
mapFst :: Functor f => (t -> t2) -> f (t, t1) -> f (t2, t1)
mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2)
fst3 :: (a, b, c) -> a
snd3 :: (a, b, c) -> b
thd3 :: (a, b, c) -> c
flipOrder :: Ordering -> Ordering
noWin :: WinTags -> Bool
-- | Used to track elements of the pattern that accept characters or are
-- anchors
newtype DoPa
DoPa :: Int -> DoPa
dopaIndex :: DoPa -> Int
-- | Control whether the pattern is multiline or case-sensitive like
-- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls
-- enabling extra anchor syntax.
data CompOption
CompOption :: Bool -> Bool -> Bool -> Bool -> Bool -> CompOption
-- | True in blankCompOpt and defaultCompOpt
caseSensitive :: CompOption -> Bool
-- | False in blankCompOpt, True in defaultCompOpt. Compile for
-- newline-sensitive matching. "By default, newline is a completely
-- ordinary character with no special meaning in either REs or strings.
-- With this flag, inverted bracket expressions and . never match
-- newline, a ^ anchor matches the null string after any newline in the
-- string in addition to its normal function, and the $ anchor matches
-- the null string before any newline in the string in addition to its
-- normal function."
multiline :: CompOption -> Bool
-- | True (and therefore Right associative) in blankCompOpt and
-- defaultCompOpt
rightAssoc :: CompOption -> Bool
-- | False in blankCompOpt, True in defaultCompOpt. Add the extended
-- non-POSIX syntax described in Text.Regex.TDFA haddock
-- documentation.
newSyntax :: CompOption -> Bool
-- | False by default. This is POSIX correct but it takes space and is
-- slower. Setting this to true will improve performance, and should be
-- done if you plan to set the captureGroups execoption to False.
lastStarGreedy :: CompOption -> Bool
data ExecOption
ExecOption :: Bool -> ExecOption
-- | True by default. Set to False to improve speed (and space).
captureGroups :: ExecOption -> Bool
-- | Used by implementation to name certain Postions during matching.
-- Identity of Position tag to set during a transition
type Tag = Int
-- | Internal use to indicate type of tag and preference for larger or
-- smaller Positions
data OP
Maximize :: OP
Minimize :: OP
Orbit :: OP
Ignore :: OP
-- | Internal NFA node identity number
type Index = Int
-- | Internal DFA identity is this Set of NFA Index
type SetIndex = IntSet
-- | Index into the text being searched
type Position = Int
-- | GroupIndex is for indexing submatches from capturing parenthesized
-- groups (PGroup/Group)
type GroupIndex = Int
-- | GroupInfo collects the parent and tag information for an instance of a
-- group
data GroupInfo
GroupInfo :: GroupIndex -> GroupIndex -> Tag -> Tag -> Tag -> GroupInfo
thisIndex :: GroupInfo -> GroupIndex
parentIndex :: GroupInfo -> GroupIndex
startTag :: GroupInfo -> Tag
stopTag :: GroupInfo -> Tag
flagTag :: GroupInfo -> Tag
-- | The TDFA backend specific Regex type, used by this module's
-- RegexOptions and RegexMaker
data Regex
Regex :: DFA -> Index -> (Index, Index) -> (Tag, Tag) -> TrieSet DFA -> Array Tag OP -> Array GroupIndex [GroupInfo] -> Bool -> CompOption -> ExecOption -> Regex
-- | starting DFA state
regex_dfa :: Regex -> DFA
-- | index of starting state
regex_init :: Regex -> Index
-- | indexes of smallest and largest states
regex_b_index :: Regex -> (Index, Index)
-- | indexes of smallest and largest tags
regex_b_tags :: Regex -> (Tag, Tag)
-- | All DFA states
regex_trie :: Regex -> TrieSet DFA
-- | information about each tag
regex_tags :: Regex -> Array Tag OP
-- | information about each group
regex_groups :: Regex -> Array GroupIndex [GroupInfo]
-- | used for optimizing execution
regex_isFrontAnchored :: Regex -> Bool
regex_compOptions :: Regex -> CompOption
regex_execOptions :: Regex -> ExecOption
data WinEmpty
WinEmpty :: Instructions -> WinEmpty
WinTest :: WhichTest -> (Maybe WinEmpty) -> (Maybe WinEmpty) -> WinEmpty
-- | Internal NFA node type
data QNFA
QNFA :: Index -> QT -> QNFA
q_id :: QNFA -> Index
q_qt :: QNFA -> QT
-- | Internal to QNFA type.
data QT
Simple :: WinTags -> CharMap QTrans -> QTrans -> QT
-- | empty transitions to the virtual winning state
qt_win :: QT -> WinTags
-- | all ways to leave this QNFA to other or the same QNFA
qt_trans :: QT -> CharMap QTrans
-- | default ways to leave this QNFA to other or the same QNFA
qt_other :: QT -> QTrans
Testing :: WhichTest -> EnumSet DoPa -> QT -> QT -> QT
-- | The test to perform
qt_test :: QT -> WhichTest
-- | location(s) of the anchor(s) in the original regexp
qt_dopas :: QT -> EnumSet DoPa
-- | use qt_a if test is True, else use qt_b
qt_a :: QT -> QT
-- | use qt_a if test is True, else use qt_b
qt_b :: QT -> QT
-- | Internal type to represent the tagged transition from one QNFA to
-- another (or itself). The key is the Index of the destination QNFA.
type QTrans = IntMap [TagCommand]
-- | Known predicates, just Beginning of Line (^) and End of Line ($). Also
-- support for GNU extensions is being added: ` beginning of buffer, '
-- end of buffer, < and > for begin and end of words, b and B for
-- word boundary and not word boundary.
data WhichTest
Test_BOL :: WhichTest
Test_EOL :: WhichTest
Test_BOB :: WhichTest
Test_EOB :: WhichTest
Test_BOW :: WhichTest
Test_EOW :: WhichTest
Test_EdgeWord :: WhichTest
Test_NotEdgeWord :: WhichTest
-- | The things that can be done with a Tag. TagTask and ResetGroupStopTask
-- are for tags with Maximize or Minimize OP values. ResetOrbitTask and
-- EnterOrbitTask and LeaveOrbitTask are for tags with Orbit OP value.
data TagTask
TagTask :: TagTask
ResetGroupStopTask :: TagTask
SetGroupStopTask :: TagTask
ResetOrbitTask :: TagTask
EnterOrbitTask :: TagTask
LeaveOrbitTask :: TagTask
-- | Ordered list of tags and their associated Task
type TagTasks = [(Tag, TagTask)]
-- | When attached to a QTrans the TagTask can be done before or after
-- accepting the character.
data TagUpdate
PreUpdate :: TagTask -> TagUpdate
PostUpdate :: TagTask -> TagUpdate
-- | Ordered list of tags and their associated update operation.
type TagList = [(Tag, TagUpdate)]
-- | A TagList and the location of the item in the original pattern that is
-- being accepted.
type TagCommand = (DoPa, TagList)
-- | Ordered list of tags and their associated update operation to perform
-- on an empty transition to the virtual winning state.
type WinTags = TagList
-- | Internal DFA node, identified by the Set of indices of the QNFA nodes
-- it represents.
data DFA
DFA :: SetIndex -> DT -> DFA
d_id :: DFA -> SetIndex
d_dt :: DFA -> DT
data Transition
Transition :: DFA -> DFA -> DTrans -> Transition
-- | where to go (maximal), including respawning
trans_many :: Transition -> DFA
-- | where to go, not including respawning
trans_single :: Transition -> DFA
-- | how to go, including respawning
trans_how :: Transition -> DTrans
-- | Internal to the DFA node
data DT
Simple' :: IntMap Instructions -> CharMap Transition -> Transition -> DT
-- | Actions to perform to win
dt_win :: DT -> IntMap Instructions
-- | Transition to accept Char
dt_trans :: DT -> CharMap Transition
-- | default accepting transition
dt_other :: DT -> Transition
Testing' :: WhichTest -> EnumSet DoPa -> DT -> DT -> DT
-- | The test to perform
dt_test :: DT -> WhichTest
-- | location(s) of the anchor(s) in the original regexp
dt_dopas :: DT -> EnumSet DoPa
-- | use dt_a if test is True else use dt_b
dt_a :: DT -> DT
-- | use dt_a if test is True else use dt_b
dt_b :: DT -> DT
-- | Internal type to repesent the commands for the tagged transition. The
-- outer IntMap is for the destination Index and the inner IntMap is for
-- the Source Index. This is convenient since all runtime data going to
-- the same destination must be compared to find the best.
--
-- A Destination IntMap entry may have an empty Source IntMap if and only
-- if the destination is the starting index and the NFA/DFA. This
-- instructs the matching engine to spawn a new entry starting at the
-- post-update position.
type DTrans = IntMap (IntMap (DoPa, Instructions))
-- | Internal convenience type for the text display code
type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position, Bool))], [String])))])]
-- | Positions for which a * was re-started while looping. Need to append
-- locations at back but compare starting with front, so use Seq as a
-- Queue. The initial position is saved in basePos (and a Maximize Tag),
-- the middle positions in the Seq, and the final position is NOT saved
-- in the Orbits (only in a Maximize Tag).
--
-- The orderinal code is being written XXX TODO document it.
data Orbits
Orbits :: !Bool -> Position -> (Maybe Int) -> !(Seq Position) -> Orbits
inOrbit :: Orbits -> !Bool
basePos :: Orbits -> Position
ordinal :: Orbits -> (Maybe Int)
getOrbits :: Orbits -> !(Seq Position)
-- | The newPos and newFlags lists in Instructions are
-- sorted by, and unique in, the Tag values
data Instructions
Instructions :: ![(Tag, Action)] -> !(Maybe (Position -> OrbitTransformer)) -> Instructions
newPos :: Instructions -> ![(Tag, Action)]
newOrbits :: Instructions -> !(Maybe (Position -> OrbitTransformer))
data Action
SetPre :: Action
SetPost :: Action
SetVal :: Int -> Action
type OrbitTransformer = OrbitLog -> OrbitLog
type OrbitLog = IntMap Orbits
showQT :: QT -> String
indent :: [String] -> String
showDT :: DT -> String
seeDTrans :: DTrans -> String
instance Eq DoPa
instance Ord DoPa
instance Read CompOption
instance Show CompOption
instance Read ExecOption
instance Show ExecOption
instance Eq OP
instance Show OP
instance Show GroupInfo
instance Show WhichTest
instance Eq WhichTest
instance Ord WhichTest
instance Enum WhichTest
instance Show TagTask
instance Eq TagTask
instance Show TagUpdate
instance Eq TagUpdate
instance Show Orbits
instance Show Action
instance Eq Action
instance Show Instructions
instance Show DFA
instance Show WinEmpty
instance Eq QT
instance Show DT
instance Show QT
instance Show QNFA
instance RegexOptions Regex CompOption ExecOption
instance Show DoPa
instance Enum DoPa
-- | This Text.Regex.TDFA.Pattern module provides the Pattern
-- data type and its subtypes. This Pattern type is used to
-- represent the parsed form of a Regular Expression.
module Text.Regex.TDFA.Pattern
-- | Pattern is the type returned by the regular expression parser. This is
-- consumed by the CorePattern module and the tender leaves are nibbled
-- by the TNFA module.
data Pattern
PEmpty :: Pattern
PGroup :: (Maybe GroupIndex) -> Pattern -> Pattern
POr :: [Pattern] -> Pattern
PConcat :: [Pattern] -> Pattern
PQuest :: Pattern -> Pattern
PPlus :: Pattern -> Pattern
PStar :: Bool -> Pattern -> Pattern
PBound :: Int -> (Maybe Int) -> Pattern -> Pattern
PCarat :: DoPa -> Pattern
getDoPa :: Pattern -> DoPa
PDollar :: DoPa -> Pattern
getDoPa :: Pattern -> DoPa
PDot :: DoPa -> Pattern
getDoPa :: Pattern -> DoPa
PAny :: DoPa -> PatternSet -> Pattern
getDoPa :: Pattern -> DoPa
getPatternSet :: Pattern -> PatternSet
PAnyNot :: DoPa -> PatternSet -> Pattern
getDoPa :: Pattern -> DoPa
getPatternSet :: Pattern -> PatternSet
PEscape :: DoPa -> Char -> Pattern
getDoPa :: Pattern -> DoPa
getPatternChar :: Pattern -> Char
PChar :: DoPa -> Char -> Pattern
getDoPa :: Pattern -> DoPa
getPatternChar :: Pattern -> Char
PNonCapture :: Pattern -> Pattern
PNonEmpty :: Pattern -> Pattern
data PatternSet
PatternSet :: (Maybe (Set Char)) -> (Maybe (Set PatternSetCharacterClass)) -> (Maybe (Set PatternSetCollatingElement)) -> (Maybe (Set PatternSetEquivalenceClass)) -> PatternSet
newtype PatternSetCharacterClass
PatternSetCharacterClass :: String -> PatternSetCharacterClass
unSCC :: PatternSetCharacterClass -> String
newtype PatternSetCollatingElement
PatternSetCollatingElement :: String -> PatternSetCollatingElement
unSCE :: PatternSetCollatingElement -> String
newtype PatternSetEquivalenceClass
PatternSetEquivalenceClass :: String -> PatternSetEquivalenceClass
unSEC :: PatternSetEquivalenceClass -> String
-- | GroupIndex is for indexing submatches from capturing parenthesized
-- groups (PGroup/Group)
type GroupIndex = Int
-- | Used to track elements of the pattern that accept characters or are
-- anchors
newtype DoPa
DoPa :: Int -> DoPa
dopaIndex :: DoPa -> Int
-- | I have not been checking, but this should have the property that
-- parsing the resulting string should result in an identical Pattern.
-- This is not true if starTrans has created PNonCapture and PNonEmpty
-- values or a (PStar False). The contents of a "[ ]" grouping are always
-- shown in a sorted canonical order.
showPattern :: Pattern -> String
-- | Do the transformation and simplification in a single traversal. This
-- removes the PPlus, PQuest, and PBound values, changing to POr and
-- PEmpty and PStar True/False. For some PBound values it adds PNonEmpty
-- and PNonCapture semantic marker. It also simplifies to flatten out
-- nested POr and PConcat instances and eliminate some unneeded PEmpty
-- values.
starTrans :: Pattern -> Pattern
starTrans' :: Pattern -> Pattern
-- | Function to transform a pattern into an equivalent, but less redundant
-- form. Nested POr and PConcat are flattened. PEmpty is
-- propagated.
simplify' :: Pattern -> Pattern
-- | Apply a Pattern transfomation function depth first
dfsPattern :: (Pattern -> Pattern) -> Pattern -> Pattern
instance Eq PatternSetCharacterClass
instance Ord PatternSetCharacterClass
instance Eq PatternSetCollatingElement
instance Ord PatternSetCollatingElement
instance Eq PatternSetEquivalenceClass
instance Ord PatternSetEquivalenceClass
instance Eq PatternSet
instance Eq Pattern
instance Show Pattern
instance Show PatternSetEquivalenceClass
instance Show PatternSetCollatingElement
instance Show PatternSetCharacterClass
instance Show PatternSet
-- | This is a POSIX version of parseRegex that allows NUL characters.
-- LazyPossessiveBackrefs are not recognized. Anchors ^ and $ are
-- recognized.
--
-- The PGroup returned always have (Maybe GroupIndex) set to (Just _) and
-- never to Nothing.
module Text.Regex.TDFA.ReadRegex
-- | Return either an error message or a tuple of the Pattern and the
-- largest group index and the largest DoPa index (both have smallest
-- index of 1). Since the regular expression is supplied as [Char] it
-- automatically supports unicode and '\NUL' characters.
parseRegex :: String -> Either ParseError (Pattern, (GroupIndex, DoPa))
-- | The CorePattern module deconstructs the Pattern tree created by
-- ReadRegex.parseRegex and returns a simpler Q/P tree with annotations
-- at each Q node. This will be converted by the TNFA module into a QNFA
-- finite automata.
--
-- Of particular note, this Pattern to Q/P conversion creates and assigns
-- all the internal Tags that will be used during the matching process,
-- and associates the captures groups with the tags that represent their
-- starting and ending locations and with their immediate parent group.
--
-- Each Maximize and Minimize tag is held as either a preTag or a postTag
-- by one and only one location in the Q/P tree. The Orbit tags are each
-- held by one and only one Star node. Tags that stop a Group are also
-- held in perhaps numerous preReset lists.
--
-- The additional nullQ::nullView field of Q records the potentially
-- complex information about what tests and tags must be used if the
-- pattern unQ::P matches 0 zero characters. There can be redundancy in
-- nullView, which is eliminated by cleanNullView.
--
-- Uses recursive do notation.
--
-- 2009 XXX TODO: we can avoid needing tags in the part of the pattern
-- after the last capturing group (when right-associative). This is
-- flipped for left-associative where the front of the pattern before the
-- first capturing group needs no tags. The edge of these regions is
-- subtle: both case needs a Maximize tag. One ought to be able to check
-- the Pattern: if the root is PConcat then a scan from the end (start)
-- looking for the first with an embedded PGroup can be found and the
-- PGroup free elements can be wrapped in some new PNOTAG semantic
-- indicator.
module Text.Regex.TDFA.CorePattern
data Q
Q :: NullView -> (Position, Maybe Position) -> [Tag] -> [Tag] -> Maybe Tag -> Maybe Tag -> Bool -> Bool -> Wanted -> P -> Q
nullQ :: Q -> NullView
takes :: Q -> (Position, Maybe Position)
preReset :: Q -> [Tag]
postSet :: Q -> [Tag]
preTag :: Q -> Maybe Tag
postTag :: Q -> Maybe Tag
tagged :: Q -> Bool
childGroups :: Q -> Bool
wants :: Q -> Wanted
unQ :: Q -> P
data P
Empty :: P
Or :: [Q] -> P
Seq :: Q -> Q -> P
Star :: Maybe Tag -> [Tag] -> Bool -> Q -> P
getOrbit :: P -> Maybe Tag
resetOrbits :: P -> [Tag]
firstNull :: P -> Bool
unStar :: P -> Q
Test :: TestInfo -> P
OneChar :: Pattern -> P
NonEmpty :: Q -> P
-- | Known predicates, just Beginning of Line (^) and End of Line ($). Also
-- support for GNU extensions is being added: ` beginning of buffer, '
-- end of buffer, < and > for begin and end of words, b and B for
-- word boundary and not word boundary.
data WhichTest
Test_BOL :: WhichTest
Test_EOL :: WhichTest
Test_BOB :: WhichTest
Test_EOB :: WhichTest
Test_BOW :: WhichTest
Test_EOW :: WhichTest
Test_EdgeWord :: WhichTest
Test_NotEdgeWord :: WhichTest
data Wanted
WantsQNFA :: Wanted
WantsQT :: Wanted
WantsBoth :: Wanted
WantsEither :: Wanted
type TestInfo = (WhichTest, DoPa)
-- | Internal use to indicate type of tag and preference for larger or
-- smaller Positions
data OP
Maximize :: OP
Minimize :: OP
Orbit :: OP
Ignore :: OP
newtype SetTestInfo
SetTestInfo :: EnumMap WhichTest (EnumSet DoPa) -> SetTestInfo
getTests :: SetTestInfo -> EnumMap WhichTest (EnumSet DoPa)
type NullView = [(SetTestInfo, TagList)]
patternToQ :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> (Q, Array Tag OP, Array GroupIndex [GroupInfo])
cleanNullView :: NullView -> NullView
cannotAccept :: Q -> Bool
mustAccept :: Q -> Bool
instance Eq SetTestInfo
instance Show HandleTag
instance Eq Wanted
instance Show Wanted
instance Eq Q
instance Show P
instance Eq P
instance Show Q
instance Show SetTestInfo
instance Monoid SetTestInfo
module Text.Regex.TDFA.NewDFA.MakeTest
test_singleline :: Uncons text => WhichTest -> Index -> Char -> text -> Bool
test_multiline :: Uncons text => WhichTest -> Index -> Char -> text -> Bool
-- | Like Engine, but merely checks to see whether any match at all is
-- found.
module Text.Regex.TDFA.NewDFA.Tester
matchTest :: Uncons text => Regex -> text -> Bool
-- | This is the code for the main engine. This captures the posix
-- subexpressions. There is also a non-capturing engine, and a testing
-- engine.
--
-- It is polymorphic over the internal Uncons type class, and specialized
-- to produce the needed variants.
module Text.Regex.TDFA.NewDFA.Engine_FA
execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
-- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String
module Text.Regex.TDFA.NewDFA.Engine_NC
execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
instance Show WScratch
-- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String
module Text.Regex.TDFA.NewDFA.Engine_NC_FA
execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
-- | This is the code for the main engine. This captures the posix
-- subexpressions. This execMatch also dispatches to
-- Engine_NC, Engine_FA, and Engine_FC_NA
--
-- It is polymorphic over the internal Uncons type class, and specialized
-- to produce the needed variants.
module Text.Regex.TDFA.NewDFA.Engine
execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
-- | Text.Regex.TDFA.TNFA converts the CorePattern Q/P data (and its
-- Pattern leafs) to a QNFA tagged non-deterministic finite automata.
--
-- This holds every possible way to follow one state by another, while in
-- the DFA these will be reduced by picking a single best transition for
-- each (soure,destination) pair. The transitions are heavily and often
-- redundantly annotated with tasks to perform, and this redundancy is
-- reduced when picking the best transition. So far, keeping all this
-- information has helped fix bugs in both the design and implementation.
--
-- The QNFA for a Pattern with a starTraned Q/P form with N one character
-- accepting leaves has at most N+1 nodes. These nodes repesent the
-- future choices after accepting a leaf. The processing of Or nodes
-- often reduces this number by sharing at the end of the different
-- paths. Turning off capturing while compiling the pattern may (future
-- extension) reduce this further for some patterns by processing Star
-- with optimizations. This compact design also means that tags are
-- assigned not just to be updated before taking a transition (PreUpdate)
-- but also after the transition (PostUpdate).
--
-- Uses recursive do notation.
module Text.Regex.TDFA.TNFA
patternToNFA :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> ((Index, Array Index QNFA), Array Tag OP, Array GroupIndex [GroupInfo])
-- | Internal NFA node type
data QNFA
QNFA :: Index -> QT -> QNFA
q_id :: QNFA -> Index
q_qt :: QNFA -> QT
-- | Internal to QNFA type.
data QT
Simple :: WinTags -> CharMap QTrans -> QTrans -> QT
-- | empty transitions to the virtual winning state
qt_win :: QT -> WinTags
-- | all ways to leave this QNFA to other or the same QNFA
qt_trans :: QT -> CharMap QTrans
-- | default ways to leave this QNFA to other or the same QNFA
qt_other :: QT -> QTrans
Testing :: WhichTest -> EnumSet DoPa -> QT -> QT -> QT
-- | The test to perform
qt_test :: QT -> WhichTest
-- | location(s) of the anchor(s) in the original regexp
qt_dopas :: QT -> EnumSet DoPa
-- | use qt_a if test is True, else use qt_b
qt_a :: QT -> QT
-- | use qt_a if test is True, else use qt_b
qt_b :: QT -> QT
-- | Internal type to represent the tagged transition from one QNFA to
-- another (or itself). The key is the Index of the destination QNFA.
type QTrans = IntMap [TagCommand]
-- | When attached to a QTrans the TagTask can be done before or after
-- accepting the character.
data TagUpdate
PreUpdate :: TagTask -> TagUpdate
PostUpdate :: TagTask -> TagUpdate
-- | Text.Regex.TDFA.TDFA converts the QNFA from TNFA into the DFA.
-- A DFA state corresponds to a Set of QNFA states, repesented as list of
-- Index which are used to lookup the DFA state in a lazy Trie which
-- holds all possible subsets of QNFA states.
module Text.Regex.TDFA.TDFA
patternToRegex :: (Pattern, (GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex
-- | Internal DFA node, identified by the Set of indices of the QNFA nodes
-- it represents.
data DFA
DFA :: SetIndex -> DT -> DFA
d_id :: DFA -> SetIndex
d_dt :: DFA -> DT
-- | Internal to the DFA node
data DT
Simple' :: IntMap Instructions -> CharMap Transition -> Transition -> DT
-- | Actions to perform to win
dt_win :: DT -> IntMap Instructions
-- | Transition to accept Char
dt_trans :: DT -> CharMap Transition
-- | default accepting transition
dt_other :: DT -> Transition
Testing' :: WhichTest -> EnumSet DoPa -> DT -> DT -> DT
-- | The test to perform
dt_test :: DT -> WhichTest
-- | location(s) of the anchor(s) in the original regexp
dt_dopas :: DT -> EnumSet DoPa
-- | use dt_a if test is True else use dt_b
dt_a :: DT -> DT
-- | use dt_a if test is True else use dt_b
dt_b :: DT -> DT
examineDFA :: Regex -> String
nfaToDFA :: ((Index, Array Index QNFA), Array Tag OP, Array GroupIndex [GroupInfo]) -> CompOption -> ExecOption -> Regex
dfaMap :: DFA -> Map SetIndex DFA
instance Show AlterOrbit
-- | This modules provides RegexMaker and RegexLike instances
-- for using String with the TDFA backend.
--
-- This exports instances of the high level API and the medium level API
-- of compile,execute, and regexec.
module Text.Regex.TDFA.String
-- | The TDFA backend specific Regex type, used by this module's
-- RegexOptions and RegexMaker
data Regex
-- | 0 based index from start of source, or (-1) for unused
type MatchOffset = Int
-- | non-negative length of a match
type MatchLength = Int
-- | Control whether the pattern is multiline or case-sensitive like
-- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls
-- enabling extra anchor syntax.
data CompOption
data ExecOption
compile :: CompOption -> ExecOption -> String -> Either String Regex
execute :: Regex -> String -> Either String (Maybe MatchArray)
regexec :: Regex -> String -> Either String (Maybe (String, String, String, [String]))
instance RegexContext Regex String String
instance RegexLike Regex String
instance RegexMaker Regex CompOption ExecOption String
-- | This modules provides RegexMaker and RegexLike instances
-- for using ByteString with the DFA backend
-- (Text.Regex.Lib.WrapDFAEngine and
-- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via
-- import Text.Regex.TDFA.
--
-- This exports instances of the high level API and the medium level API
-- of compile,execute, and regexec.
module Text.Regex.TDFA.ByteString
-- | The TDFA backend specific Regex type, used by this module's
-- RegexOptions and RegexMaker
data Regex
-- | Control whether the pattern is multiline or case-sensitive like
-- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls
-- enabling extra anchor syntax.
data CompOption
data ExecOption
compile :: CompOption -> ExecOption -> ByteString -> Either String Regex
execute :: Regex -> ByteString -> Either String (Maybe MatchArray)
regexec :: Regex -> ByteString -> Either String (Maybe (ByteString, ByteString, ByteString, [ByteString]))
instance RegexLike Regex ByteString
instance RegexMaker Regex CompOption ExecOption ByteString
instance RegexContext Regex ByteString ByteString
-- | This modules provides RegexMaker and RegexLike instances
-- for using ByteString with the DFA backend
-- (Text.Regex.Lib.WrapDFAEngine and
-- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via
-- import Text.Regex.TDFA.
--
-- This exports instances of the high level API and the medium level API
-- of compile,execute, and regexec.
module Text.Regex.TDFA.ByteString.Lazy
-- | The TDFA backend specific Regex type, used by this module's
-- RegexOptions and RegexMaker
data Regex
-- | Control whether the pattern is multiline or case-sensitive like
-- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls
-- enabling extra anchor syntax.
data CompOption
data ExecOption
compile :: CompOption -> ExecOption -> ByteString -> Either String Regex
execute :: Regex -> ByteString -> Either String (Maybe MatchArray)
regexec :: Regex -> ByteString -> Either String (Maybe (ByteString, ByteString, ByteString, [ByteString]))
instance RegexLike Regex ByteString
instance RegexMaker Regex CompOption ExecOption ByteString
instance RegexContext Regex ByteString ByteString
-- | This modules provides RegexMaker and RegexLike instances
-- for using ByteString with the DFA backend
-- (Text.Regex.Lib.WrapDFAEngine and
-- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via
-- import Text.Regex.TDFA.
--
-- This exports instances of the high level API and the medium level API
-- of compile,execute, and regexec.
module Text.Regex.TDFA.Sequence
-- | The TDFA backend specific Regex type, used by this module's
-- RegexOptions and RegexMaker
data Regex
-- | Control whether the pattern is multiline or case-sensitive like
-- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls
-- enabling extra anchor syntax.
data CompOption
data ExecOption
compile :: CompOption -> ExecOption -> (Seq Char) -> Either String Regex
execute :: Regex -> (Seq Char) -> Either String (Maybe MatchArray)
regexec :: Regex -> (Seq Char) -> Either String (Maybe (Seq Char, Seq Char, Seq Char, [(Seq Char)]))
instance RegexLike Regex (Seq Char)
instance RegexMaker Regex CompOption ExecOption (Seq Char)
instance RegexContext Regex (Seq Char) (Seq Char)
-- | The Text.Regex.TDFA module provides a backend for regular
-- expressions. It provides instances for the classes defined and
-- documented in Text.Regex.Base and re-exported by this module.
-- If you import this along with other backends then you should do so
-- with qualified imports (with renaming for convenience).
--
-- This regex-tdfa package implements, correctly, POSIX extended regular
-- expressions. It is highly unlikely that the regex-posix package on
-- your operating system is correct, see
-- http:/www.haskell.orghaskellwiki/Regex_Posix for examples of
-- your OS's bugs.
--
-- This package does provide captured parenthesized subexpressions.
--
-- Depending on the text being searched this package supports Unicode.
-- The [Char] and (Seq Char) text types support Unicode. The ByteString
-- and ByteString.Lazy text types only support ASCII. It is possible to
-- support utf8 encoded ByteString.Lazy by using regex-tdfa and
-- regex-tdfa-utf8 packages together (required the utf8-string package).
--
-- As of version 1.1.1 the following GNU extensions are recognized, all
-- anchors:
--
-- \` at beginning of entire text
--
-- \' at end of entire text
--
-- \< at beginning of word
--
-- \> at end of word
--
-- \b at either beginning or end of word
--
-- \B at neither beginning nor end of word
--
-- The above are controlled by the newSyntax Bool in
-- CompOption.
--
-- Where the "word" boundaries means between characters that are and are
-- not in the [:word:] character class which contains [a-zA-Z0-9_]. Note
-- that < and b may match before the entire text and > and b may
-- match at the end of the entire text.
--
-- There is no locale support, so collating elements like [.ch.] are
-- simply ignored and equivalence classes like [=a=] are converted to
-- just [a]. The character classes like [:alnum:] are supported over
-- ASCII only, valid classes are alnum, digit, punct, alpha, graph,
-- space, blank, lower, upper, cntrl, print, xdigit, word.
--
-- This package does not provide "basic" regular expressions. This
-- package does not provide back references inside regular expressions.
--
-- The package does not provide Perl style regular expressions. Please
-- look at the regex-pcre and pcre-light packages instead.
module Text.Regex.TDFA
getVersion_Text_Regex_TDFA :: Version
-- | This is the pure functional matching operator. If the target cannot be
-- produced then some empty result will be returned. If there is an error
-- in processing, then error will be called.
(=~) :: (RegexMaker Regex CompOption ExecOption source, RegexContext Regex source1 target) => source1 -> source -> target
-- | This is the monadic matching operator. If a single match fails, then
-- fail will be called.
(=~~) :: (RegexMaker Regex CompOption ExecOption source, RegexContext Regex source1 target, Monad m) => source1 -> source -> m target