regex-tdfa-1.2.2: Replaces/Enhances Text.Regex

Safe HaskellNone



Common provides simple functions to the backend. It defines most of the data types. All modules should call error via the common_error function below.



look :: Int -> IntMap a -> a Source

on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2 Source

norep :: Eq a => [a] -> [a] Source

after sort or sortBy the use of 'nub'\/'nubBy' can be replaced by 'norep'\/'norepBy'

norepBy :: (a -> a -> Bool) -> [a] -> [a] Source

after sort or sortBy the use of 'nub'\/'nubBy' can be replaced by 'norep'\/'norepBy'

mapFst :: Functor f => (t -> t2) -> f (t, t1) -> f (t2, t1) Source

mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2) Source

fst3 :: (a, b, c) -> a Source

snd3 :: (a, b, c) -> b Source

thd3 :: (a, b, c) -> c Source

newtype DoPa Source

Used to track elements of the pattern that accept characters or are anchors




dopaIndex :: Int

data CompOption Source

Control whether the pattern is multiline or case-sensitive like Text.Regex and whether to capture the subgroups (\1, \2, etc). Controls enabling extra anchor syntax.




caseSensitive :: Bool

True in blankCompOpt and defaultCompOpt

multiline :: Bool

False in blankCompOpt, True in defaultCompOpt. Compile for newline-sensitive matching. "By default, newline is a completely ordinary character with no special meaning in either REs or strings. With this flag, inverted bracket expressions and . never match newline, a ^ anchor matches the null string after any newline in the string in addition to its normal function, and the $ anchor matches the null string before any newline in the string in addition to its normal function."

rightAssoc :: Bool

True (and therefore Right associative) in blankCompOpt and defaultCompOpt

newSyntax :: Bool

False in blankCompOpt, True in defaultCompOpt. Add the extended non-POSIX syntax described in Text.Regex.TDFA haddock documentation.

lastStarGreedy :: Bool

False by default. This is POSIX correct but it takes space and is slower. Setting this to true will improve performance, and should be done if you plan to set the captureGroups execoption to False.

data ExecOption Source




captureGroups :: Bool

True by default. Set to False to improve speed (and space).

type Tag = Int Source

Used by implementation to name certain Postions during matching. Identity of Position tag to set during a transition

data OP Source

Internal use to indicate type of tag and preference for larger or smaller Positions




type Index = Int Source

Internal NFA node identity number

type SetIndex = IntSet Source

Internal DFA identity is this Set of NFA Index

type Position = Int Source

Index into the text being searched

type GroupIndex = Int Source

GroupIndex is for indexing submatches from capturing parenthesized groups (PGroup/Group)

data GroupInfo Source

GroupInfo collects the parent and tag information for an instance of a group


data Regex Source

The TDFA backend specific Regex type, used by this module's RegexOptions and RegexMaker




regex_dfa :: DFA

starting DFA state

regex_init :: Index

index of starting state

regex_b_index :: (Index, Index)

indexes of smallest and largest states

regex_b_tags :: (Tag, Tag)

indexes of smallest and largest tags

regex_trie :: TrieSet DFA

All DFA states

regex_tags :: Array Tag OP

information about each tag

regex_groups :: Array GroupIndex [GroupInfo]

information about each group

regex_isFrontAnchored :: Bool

used for optimizing execution

regex_compOptions :: CompOption
regex_execOptions :: ExecOption

data QNFA Source

Internal NFA node type




q_id :: Index
q_qt :: QT


data QT Source

Internal to QNFA type.




qt_win :: WinTags

empty transitions to the virtual winning state

qt_trans :: CharMap QTrans

all ways to leave this QNFA to other or the same QNFA

qt_other :: QTrans

default ways to leave this QNFA to other or the same QNFA



qt_test :: WhichTest

The test to perform

qt_dopas :: EnumSet DoPa

location(s) of the anchor(s) in the original regexp

qt_a, qt_b :: QT

use qt_a if test is True, else use qt_b


type QTrans = IntMap [TagCommand] Source

Internal type to represent the tagged transition from one QNFA to another (or itself). The key is the Index of the destination QNFA.

data WhichTest Source

Known predicates, just Beginning of Line (^) and End of Line ($). Also support for GNU extensions is being added: \` beginning of buffer, \' end of buffer, \< and \> for begin and end of words, \b and \B for word boundary and not word boundary.

data TagTask Source

The things that can be done with a Tag. TagTask and ResetGroupStopTask are for tags with Maximize or Minimize OP values. ResetOrbitTask and EnterOrbitTask and LeaveOrbitTask are for tags with Orbit OP value.

type TagTasks = [(Tag, TagTask)] Source

Ordered list of tags and their associated Task

data TagUpdate Source

When attached to a QTrans the TagTask can be done before or after accepting the character.

type TagList = [(Tag, TagUpdate)] Source

Ordered list of tags and their associated update operation.

type TagCommand = (DoPa, TagList) Source

A TagList and the location of the item in the original pattern that is being accepted.

type WinTags = TagList Source

Ordered list of tags and their associated update operation to perform on an empty transition to the virtual winning state.

data DFA Source

Internal DFA node, identified by the Set of indices of the QNFA nodes it represents.




d_id :: SetIndex
d_dt :: DT


data Transition Source




trans_many :: DFA

where to go (maximal), including respawning

trans_single :: DFA

where to go, not including respawning

trans_how :: DTrans

how to go, including respawning

data DT Source

Internal to the DFA node




dt_win :: IntMap Instructions

Actions to perform to win

dt_trans :: CharMap Transition

Transition to accept Char

dt_other :: Transition

default accepting transition



dt_test :: WhichTest

The test to perform

dt_dopas :: EnumSet DoPa

location(s) of the anchor(s) in the original regexp

dt_a, dt_b :: DT

use dt_a if test is True else use dt_b


type DTrans = IntMap (IntMap (DoPa, Instructions)) Source

Internal type to repesent the commands for the tagged transition. The outer IntMap is for the destination Index and the inner IntMap is for the Source Index. This is convenient since all runtime data going to the same destination must be compared to find the best.

A Destination IntMap entry may have an empty Source IntMap if and only if the destination is the starting index and the NFA/DFA. This instructs the matching engine to spawn a new entry starting at the post-update position.

type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position, Bool))], [String])))])] Source

Internal convenience type for the text display code

data Orbits Source

Positions for which a * was re-started while looping. Need to append locations at back but compare starting with front, so use Seq as a Queue. The initial position is saved in basePos (and a Maximize Tag), the middle positions in the Seq, and the final position is NOT saved in the Orbits (only in a Maximize Tag).

The orderinal code is being written XXX TODO document it.




data Instructions Source

The newPos and newFlags lists in Instructions are sorted by, and unique in, the Tag values