-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Replaces/Enhances Text.Regex -- @package regex-tdfa @version 1.2.1 module Text.Regex.TDFA.NewDFA.Uncons class Uncons a uncons :: Uncons a => a -> Maybe (Char, a) instance Uncons ByteString instance Uncons ByteString instance Uncons (Seq Char) instance Uncons [Char] -- | This creates a lazy Trie based on a finite range of Ints and is used -- to memorize a function over the subsets of this range. -- -- To create a Trie you need two supply 2 things * Range of keys to bound -- * A function or functions used to construct the value for a subset of -- keys -- -- The Trie uses the Array type internally. module Text.Regex.TDFA.IntArrTrieSet data TrieSet v TrieSet :: v -> Array Int (TrieSet v) -> TrieSet v value :: TrieSet v -> v next :: TrieSet v -> Array Int (TrieSet v) -- | This is the accessor for the Trie. The list of keys should be sorted. lookupAsc :: TrieSet v -> [Int] -> v -- | This is a Trie constructor for a complete range of keys. fromBounds :: (Int, Int) -> ([Int] -> v) -> TrieSet v -- | This is a Trie constructor for a complete range of keys that uses a -- function from single values and a merge operation on values to fill -- the Trie. fromSinglesMerge :: v -> (v -> v -> v) -> (Int, Int) -> (Int -> v) -> TrieSet v -- | This is a Trie constructor for a complete range of keys that uses a -- function from single values and a sum operation of values to fill the -- Trie. fromSinglesSum :: ([v] -> v) -> (Int, Int) -> (Int -> v) -> TrieSet v module Data.IntSet.EnumSet2 newtype EnumSet e EnumSet :: IntSet -> EnumSet e unEnumSet :: EnumSet e -> IntSet (\\) :: Enum e => EnumSet e -> EnumSet e -> EnumSet e null :: Enum e => EnumSet e -> Bool size :: Enum e => EnumSet e -> Int member :: Enum e => e -> EnumSet e -> Bool notMember :: Enum e => Int -> EnumSet e -> Bool isSubsetOf :: Enum e => EnumSet e -> EnumSet e -> Bool isProperSubsetOf :: Enum e => EnumSet e -> EnumSet e -> Bool empty :: Enum e => EnumSet e singleton :: Enum e => e -> EnumSet e insert :: Enum e => e -> EnumSet e -> EnumSet e delete :: Enum e => e -> EnumSet e -> EnumSet e union :: Enum e => EnumSet e -> EnumSet e -> EnumSet e unions :: Enum e => [EnumSet e] -> EnumSet e difference :: Enum e => EnumSet e -> EnumSet e -> EnumSet e intersection :: Enum e => EnumSet e -> EnumSet e -> EnumSet e filter :: Enum e => (e -> Bool) -> EnumSet e -> EnumSet e partition :: Enum e => (e -> Bool) -> EnumSet e -> (EnumSet e, EnumSet e) split :: Enum e => e -> EnumSet e -> (EnumSet e, EnumSet e) splitMember :: Enum e => e -> EnumSet e -> (EnumSet e, Bool, EnumSet e) map :: Enum e => (e -> e) -> EnumSet e -> EnumSet e fold :: Enum e => (e -> b -> b) -> b -> EnumSet e -> b elems :: Enum e => EnumSet e -> [e] toList :: Enum e => EnumSet e -> [e] fromList :: Enum e => [e] -> EnumSet e toAscList :: Enum e => EnumSet e -> [e] fromAscList :: Enum e => [e] -> EnumSet e fromDistinctAscList :: Enum e => [e] -> EnumSet e showTree :: Enum e => EnumSet e -> String showTreeWith :: Enum e => Bool -> Bool -> EnumSet e -> String instance Eq (EnumSet e) instance Ord (EnumSet e) instance Read (EnumSet e) instance Show (EnumSet e) instance Monoid (EnumSet e) module Data.IntMap.EnumMap2 newtype EnumMap k a EnumMap :: IntMap a -> EnumMap k a unEnumMap :: EnumMap k a -> IntMap a (!) :: Enum key => EnumMap key a -> key -> a (\\) :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a null :: Enum key => EnumMap key a -> Bool size :: Enum key => EnumMap key a -> Int member :: Enum key => key -> EnumMap key a -> Bool notMember :: Enum key => key -> EnumMap key a -> Bool lookup :: Enum key => key -> EnumMap key a -> Maybe a findWithDefault :: Enum key => a -> key -> EnumMap key a -> a empty :: Enum key => EnumMap key a singleton :: Enum key => key -> a -> EnumMap key a insert :: Enum key => key -> a -> EnumMap key a -> EnumMap key a insertWith :: Enum key => (a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a insertWithKey :: Enum key => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a insertLookupWithKey :: Enum key => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> (Maybe a, EnumMap key a) delete :: Enum key => key -> EnumMap key a -> EnumMap key a adjust :: Enum key => (a -> a) -> key -> EnumMap key a -> EnumMap key a adjustWithKey :: Enum key => (key -> a -> a) -> key -> EnumMap key a -> EnumMap key a update :: Enum key => (a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a updateWithKey :: Enum key => (key -> a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a updateLookupWithKey :: Enum key => (key -> a -> Maybe a) -> key -> EnumMap key a -> (Maybe a, EnumMap key a) union :: Enum key => EnumMap key a -> EnumMap key a -> EnumMap key a unionWith :: Enum key => (a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a unionWithKey :: Enum key => (key -> a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a unions :: Enum key => [EnumMap key a] -> EnumMap key a unionsWith :: Enum key => (a -> a -> a) -> [EnumMap key a] -> EnumMap key a difference :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a differenceWith :: Enum key => (a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a differenceWithKey :: Enum key => (key -> a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a intersection :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a intersectionWith :: Enum key => (a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a intersectionWithKey :: Enum key => (key -> a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a map :: Enum key => (a -> b) -> EnumMap key a -> EnumMap key b mapWithKey :: Enum key => (key -> a -> b) -> EnumMap key a -> EnumMap key b mapAccum :: Enum key => (a -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) mapAccumWithKey :: Enum key => (a -> key -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) fold :: Enum key => (a -> b -> b) -> b -> EnumMap key a -> b foldWithKey :: Enum key => (key -> a -> b -> b) -> b -> EnumMap key a -> b elems :: Enum key => EnumMap key a -> [a] keys :: Enum key => EnumMap key a -> [key] keysSet :: Enum key => EnumMap key a -> EnumSet key assocs :: Enum key => EnumMap key a -> [(key, a)] toList :: Enum key => EnumMap key a -> [(key, a)] fromList :: Enum key => [(key, a)] -> EnumMap key a fromListWith :: Enum key => (a -> a -> a) -> [(key, a)] -> EnumMap key a fromListWithKey :: Enum key => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a toAscList :: Enum key => EnumMap key a -> [(key, a)] fromAscList :: Enum key => [(key, a)] -> EnumMap key a fromAscListWith :: Enum key => (a -> a -> a) -> [(key, a)] -> EnumMap key a fromAscListWithKey :: Enum key => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a fromDistinctAscList :: Enum key => [(key, a)] -> EnumMap key a filter :: Enum key => (a -> Bool) -> EnumMap key a -> EnumMap key a filterWithKey :: Enum key => (key -> a -> Bool) -> EnumMap key a -> EnumMap key a partition :: Enum key => (a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) partitionWithKey :: Enum key => (key -> a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) mapMaybe :: Enum key => (a -> Maybe b) -> EnumMap key a -> EnumMap key b mapMaybeWithKey :: Enum key => (key -> a -> Maybe b) -> EnumMap key a -> EnumMap key b mapEither :: Enum key => (a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) mapEitherWithKey :: Enum key => (key -> a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) split :: Enum key => key -> EnumMap key a -> (EnumMap key a, EnumMap key a) splitLookup :: Enum key => key -> EnumMap key a -> (EnumMap key a, Maybe a, EnumMap key a) isSubmapOf :: (Enum key, Eq a) => EnumMap key a -> EnumMap key a -> Bool isSubmapOfBy :: Enum key => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool isProperSubmapOf :: (Enum key, Eq a) => EnumMap key a -> EnumMap key a -> Bool isProperSubmapOfBy :: Enum key => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool showTree :: (Enum key, Show a) => EnumMap key a -> String showTreeWith :: (Enum key, Show a) => Bool -> Bool -> EnumMap key a -> String instance Eq a => Eq (EnumMap k a) instance Ord a => Ord (EnumMap k a) instance Read a => Read (EnumMap k a) instance Show a => Show (EnumMap k a) instance Ord k => Foldable (EnumMap k) instance Ord k => Functor (EnumMap k) instance Ord k => Monoid (EnumMap k a) module Data.IntMap.CharMap2 newtype CharMap a CharMap :: IntMap a -> CharMap a unCharMap :: CharMap a -> IntMap a type Key = Char (!) :: CharMap a -> Key -> a (\\) :: CharMap a -> CharMap b -> CharMap a null :: CharMap a -> Bool size :: CharMap a -> Int member :: Key -> CharMap a -> Bool notMember :: Key -> CharMap a -> Bool lookup :: Key -> CharMap a -> Maybe a findWithDefault :: a -> Key -> CharMap a -> a empty :: CharMap a singleton :: Key -> a -> CharMap a insert :: Key -> a -> CharMap a -> CharMap a insertWith :: (a -> a -> a) -> Key -> a -> CharMap a -> CharMap a insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> CharMap a insertLookupWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> (Maybe a, CharMap a) delete :: Key -> CharMap a -> CharMap a adjust :: (a -> a) -> Key -> CharMap a -> CharMap a adjustWithKey :: (Key -> a -> a) -> Key -> CharMap a -> CharMap a update :: (a -> Maybe a) -> Key -> CharMap a -> CharMap a updateWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> CharMap a updateLookupWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> (Maybe a, CharMap a) union :: CharMap a -> CharMap a -> CharMap a unionWith :: (a -> a -> a) -> CharMap a -> CharMap a -> CharMap a unionWithKey :: (Key -> a -> a -> a) -> CharMap a -> CharMap a -> CharMap a unions :: [CharMap a] -> CharMap a unionsWith :: (a -> a -> a) -> [CharMap a] -> CharMap a difference :: CharMap a -> CharMap b -> CharMap a differenceWith :: (a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a differenceWithKey :: (Key -> a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a intersection :: CharMap a -> CharMap b -> CharMap a intersectionWith :: (a -> b -> a) -> CharMap a -> CharMap b -> CharMap a intersectionWithKey :: (Key -> a -> b -> a) -> CharMap a -> CharMap b -> CharMap a map :: (a -> b) -> CharMap a -> CharMap b mapWithKey :: (Key -> a -> b) -> CharMap a -> CharMap b mapAccum :: (a -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) mapAccumWithKey :: (a -> Key -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) fold :: (a -> b -> b) -> b -> CharMap a -> b foldWithKey :: (Key -> a -> b -> b) -> b -> CharMap a -> b elems :: CharMap a -> [a] keys :: CharMap a -> [Key] keysSet :: CharMap a -> IntSet assocs :: CharMap a -> [(Key, a)] toList :: CharMap a -> [(Key, a)] fromList :: [(Key, a)] -> CharMap a fromListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a fromListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a toAscList :: CharMap a -> [(Key, a)] fromAscList :: [(Key, a)] -> CharMap a fromAscListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a fromDistinctAscList :: [(Key, a)] -> CharMap a filter :: (a -> Bool) -> CharMap a -> CharMap a filterWithKey :: (Key -> a -> Bool) -> CharMap a -> CharMap a partition :: (a -> Bool) -> CharMap a -> (CharMap a, CharMap a) partitionWithKey :: (Key -> a -> Bool) -> CharMap a -> (CharMap a, CharMap a) mapMaybe :: (a -> Maybe b) -> CharMap a -> CharMap b mapMaybeWithKey :: (Key -> a -> Maybe b) -> CharMap a -> CharMap b mapEither :: (a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) mapEitherWithKey :: (Key -> a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) split :: Key -> CharMap a -> (CharMap a, CharMap a) splitLookup :: Key -> CharMap a -> (CharMap a, Maybe a, CharMap a) isSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool isSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool isProperSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool isProperSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool showTree :: Show a => CharMap a -> String showTreeWith :: Show a => Bool -> Bool -> CharMap a -> String instance Eq a => Eq (CharMap a) instance Ord a => Ord (CharMap a) instance Read a => Read (CharMap a) instance Show a => Show (CharMap a) instance Functor CharMap instance Monoid (CharMap a) -- | Common provides simple functions to the backend. It defines most of -- the data types. All modules should call error via the common_error -- function below. module Text.Regex.TDFA.Common look :: Int -> IntMap a -> a common_error :: String -> String -> a on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2 -- | after sort or sortBy the use of nubnubBy can be replaced by -- norepnorepBy norep :: Eq a => [a] -> [a] -- | after sort or sortBy the use of nubnubBy can be replaced by -- norepnorepBy norepBy :: (a -> a -> Bool) -> [a] -> [a] mapFst :: Functor f => (t -> t2) -> f (t, t1) -> f (t2, t1) mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2) fst3 :: (a, b, c) -> a snd3 :: (a, b, c) -> b thd3 :: (a, b, c) -> c flipOrder :: Ordering -> Ordering noWin :: WinTags -> Bool -- | Used to track elements of the pattern that accept characters or are -- anchors newtype DoPa DoPa :: Int -> DoPa dopaIndex :: DoPa -> Int -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls -- enabling extra anchor syntax. data CompOption CompOption :: Bool -> Bool -> Bool -> Bool -> Bool -> CompOption -- | True in blankCompOpt and defaultCompOpt caseSensitive :: CompOption -> Bool -- | False in blankCompOpt, True in defaultCompOpt. Compile for -- newline-sensitive matching. "By default, newline is a completely -- ordinary character with no special meaning in either REs or strings. -- With this flag, inverted bracket expressions and . never match -- newline, a ^ anchor matches the null string after any newline in the -- string in addition to its normal function, and the $ anchor matches -- the null string before any newline in the string in addition to its -- normal function." multiline :: CompOption -> Bool -- | True (and therefore Right associative) in blankCompOpt and -- defaultCompOpt rightAssoc :: CompOption -> Bool -- | False in blankCompOpt, True in defaultCompOpt. Add the extended -- non-POSIX syntax described in Text.Regex.TDFA haddock -- documentation. newSyntax :: CompOption -> Bool -- | False by default. This is POSIX correct but it takes space and is -- slower. Setting this to true will improve performance, and should be -- done if you plan to set the captureGroups execoption to False. lastStarGreedy :: CompOption -> Bool data ExecOption ExecOption :: Bool -> ExecOption -- | True by default. Set to False to improve speed (and space). captureGroups :: ExecOption -> Bool -- | Used by implementation to name certain Postions during matching. -- Identity of Position tag to set during a transition type Tag = Int -- | Internal use to indicate type of tag and preference for larger or -- smaller Positions data OP Maximize :: OP Minimize :: OP Orbit :: OP Ignore :: OP -- | Internal NFA node identity number type Index = Int -- | Internal DFA identity is this Set of NFA Index type SetIndex = IntSet -- | Index into the text being searched type Position = Int -- | GroupIndex is for indexing submatches from capturing parenthesized -- groups (PGroup/Group) type GroupIndex = Int -- | GroupInfo collects the parent and tag information for an instance of a -- group data GroupInfo GroupInfo :: GroupIndex -> GroupIndex -> Tag -> Tag -> Tag -> GroupInfo thisIndex :: GroupInfo -> GroupIndex parentIndex :: GroupInfo -> GroupIndex startTag :: GroupInfo -> Tag stopTag :: GroupInfo -> Tag flagTag :: GroupInfo -> Tag -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker data Regex Regex :: DFA -> Index -> (Index, Index) -> (Tag, Tag) -> TrieSet DFA -> Array Tag OP -> Array GroupIndex [GroupInfo] -> Bool -> CompOption -> ExecOption -> Regex -- | starting DFA state regex_dfa :: Regex -> DFA -- | index of starting state regex_init :: Regex -> Index -- | indexes of smallest and largest states regex_b_index :: Regex -> (Index, Index) -- | indexes of smallest and largest tags regex_b_tags :: Regex -> (Tag, Tag) -- | All DFA states regex_trie :: Regex -> TrieSet DFA -- | information about each tag regex_tags :: Regex -> Array Tag OP -- | information about each group regex_groups :: Regex -> Array GroupIndex [GroupInfo] -- | used for optimizing execution regex_isFrontAnchored :: Regex -> Bool regex_compOptions :: Regex -> CompOption regex_execOptions :: Regex -> ExecOption data WinEmpty WinEmpty :: Instructions -> WinEmpty WinTest :: WhichTest -> (Maybe WinEmpty) -> (Maybe WinEmpty) -> WinEmpty -- | Internal NFA node type data QNFA QNFA :: Index -> QT -> QNFA q_id :: QNFA -> Index q_qt :: QNFA -> QT -- | Internal to QNFA type. data QT Simple :: WinTags -> CharMap QTrans -> QTrans -> QT -- | empty transitions to the virtual winning state qt_win :: QT -> WinTags -- | all ways to leave this QNFA to other or the same QNFA qt_trans :: QT -> CharMap QTrans -- | default ways to leave this QNFA to other or the same QNFA qt_other :: QT -> QTrans Testing :: WhichTest -> EnumSet DoPa -> QT -> QT -> QT -- | The test to perform qt_test :: QT -> WhichTest -- | location(s) of the anchor(s) in the original regexp qt_dopas :: QT -> EnumSet DoPa -- | use qt_a if test is True, else use qt_b qt_a :: QT -> QT -- | use qt_a if test is True, else use qt_b qt_b :: QT -> QT -- | Internal type to represent the tagged transition from one QNFA to -- another (or itself). The key is the Index of the destination QNFA. type QTrans = IntMap [TagCommand] -- | Known predicates, just Beginning of Line (^) and End of Line ($). Also -- support for GNU extensions is being added: ` beginning of buffer, ' -- end of buffer, < and > for begin and end of words, b and B for -- word boundary and not word boundary. data WhichTest Test_BOL :: WhichTest Test_EOL :: WhichTest Test_BOB :: WhichTest Test_EOB :: WhichTest Test_BOW :: WhichTest Test_EOW :: WhichTest Test_EdgeWord :: WhichTest Test_NotEdgeWord :: WhichTest -- | The things that can be done with a Tag. TagTask and ResetGroupStopTask -- are for tags with Maximize or Minimize OP values. ResetOrbitTask and -- EnterOrbitTask and LeaveOrbitTask are for tags with Orbit OP value. data TagTask TagTask :: TagTask ResetGroupStopTask :: TagTask SetGroupStopTask :: TagTask ResetOrbitTask :: TagTask EnterOrbitTask :: TagTask LeaveOrbitTask :: TagTask -- | Ordered list of tags and their associated Task type TagTasks = [(Tag, TagTask)] -- | When attached to a QTrans the TagTask can be done before or after -- accepting the character. data TagUpdate PreUpdate :: TagTask -> TagUpdate PostUpdate :: TagTask -> TagUpdate -- | Ordered list of tags and their associated update operation. type TagList = [(Tag, TagUpdate)] -- | A TagList and the location of the item in the original pattern that is -- being accepted. type TagCommand = (DoPa, TagList) -- | Ordered list of tags and their associated update operation to perform -- on an empty transition to the virtual winning state. type WinTags = TagList -- | Internal DFA node, identified by the Set of indices of the QNFA nodes -- it represents. data DFA DFA :: SetIndex -> DT -> DFA d_id :: DFA -> SetIndex d_dt :: DFA -> DT data Transition Transition :: DFA -> DFA -> DTrans -> Transition -- | where to go (maximal), including respawning trans_many :: Transition -> DFA -- | where to go, not including respawning trans_single :: Transition -> DFA -- | how to go, including respawning trans_how :: Transition -> DTrans -- | Internal to the DFA node data DT Simple' :: IntMap Instructions -> CharMap Transition -> Transition -> DT -- | Actions to perform to win dt_win :: DT -> IntMap Instructions -- | Transition to accept Char dt_trans :: DT -> CharMap Transition -- | default accepting transition dt_other :: DT -> Transition Testing' :: WhichTest -> EnumSet DoPa -> DT -> DT -> DT -- | The test to perform dt_test :: DT -> WhichTest -- | location(s) of the anchor(s) in the original regexp dt_dopas :: DT -> EnumSet DoPa -- | use dt_a if test is True else use dt_b dt_a :: DT -> DT -- | use dt_a if test is True else use dt_b dt_b :: DT -> DT -- | Internal type to repesent the commands for the tagged transition. The -- outer IntMap is for the destination Index and the inner IntMap is for -- the Source Index. This is convenient since all runtime data going to -- the same destination must be compared to find the best. -- -- A Destination IntMap entry may have an empty Source IntMap if and only -- if the destination is the starting index and the NFA/DFA. This -- instructs the matching engine to spawn a new entry starting at the -- post-update position. type DTrans = IntMap (IntMap (DoPa, Instructions)) -- | Internal convenience type for the text display code type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position, Bool))], [String])))])] -- | Positions for which a * was re-started while looping. Need to append -- locations at back but compare starting with front, so use Seq as a -- Queue. The initial position is saved in basePos (and a Maximize Tag), -- the middle positions in the Seq, and the final position is NOT saved -- in the Orbits (only in a Maximize Tag). -- -- The orderinal code is being written XXX TODO document it. data Orbits Orbits :: !Bool -> Position -> (Maybe Int) -> !(Seq Position) -> Orbits inOrbit :: Orbits -> !Bool basePos :: Orbits -> Position ordinal :: Orbits -> (Maybe Int) getOrbits :: Orbits -> !(Seq Position) -- | The newPos and newFlags lists in Instructions are -- sorted by, and unique in, the Tag values data Instructions Instructions :: ![(Tag, Action)] -> !(Maybe (Position -> OrbitTransformer)) -> Instructions newPos :: Instructions -> ![(Tag, Action)] newOrbits :: Instructions -> !(Maybe (Position -> OrbitTransformer)) data Action SetPre :: Action SetPost :: Action SetVal :: Int -> Action type OrbitTransformer = OrbitLog -> OrbitLog type OrbitLog = IntMap Orbits showQT :: QT -> String indent :: [String] -> String showDT :: DT -> String seeDTrans :: DTrans -> String instance Eq DoPa instance Ord DoPa instance Read CompOption instance Show CompOption instance Read ExecOption instance Show ExecOption instance Eq OP instance Show OP instance Show GroupInfo instance Show WhichTest instance Eq WhichTest instance Ord WhichTest instance Enum WhichTest instance Show TagTask instance Eq TagTask instance Show TagUpdate instance Eq TagUpdate instance Show Orbits instance Show Action instance Eq Action instance Show Instructions instance Show DFA instance Show WinEmpty instance Eq QT instance Show DT instance Show QT instance Show QNFA instance RegexOptions Regex CompOption ExecOption instance Show DoPa instance Enum DoPa -- | This Text.Regex.TDFA.Pattern module provides the Pattern -- data type and its subtypes. This Pattern type is used to -- represent the parsed form of a Regular Expression. module Text.Regex.TDFA.Pattern -- | Pattern is the type returned by the regular expression parser. This is -- consumed by the CorePattern module and the tender leaves are nibbled -- by the TNFA module. data Pattern PEmpty :: Pattern PGroup :: (Maybe GroupIndex) -> Pattern -> Pattern POr :: [Pattern] -> Pattern PConcat :: [Pattern] -> Pattern PQuest :: Pattern -> Pattern PPlus :: Pattern -> Pattern PStar :: Bool -> Pattern -> Pattern PBound :: Int -> (Maybe Int) -> Pattern -> Pattern PCarat :: DoPa -> Pattern getDoPa :: Pattern -> DoPa PDollar :: DoPa -> Pattern getDoPa :: Pattern -> DoPa PDot :: DoPa -> Pattern getDoPa :: Pattern -> DoPa PAny :: DoPa -> PatternSet -> Pattern getDoPa :: Pattern -> DoPa getPatternSet :: Pattern -> PatternSet PAnyNot :: DoPa -> PatternSet -> Pattern getDoPa :: Pattern -> DoPa getPatternSet :: Pattern -> PatternSet PEscape :: DoPa -> Char -> Pattern getDoPa :: Pattern -> DoPa getPatternChar :: Pattern -> Char PChar :: DoPa -> Char -> Pattern getDoPa :: Pattern -> DoPa getPatternChar :: Pattern -> Char PNonCapture :: Pattern -> Pattern PNonEmpty :: Pattern -> Pattern data PatternSet PatternSet :: (Maybe (Set Char)) -> (Maybe (Set PatternSetCharacterClass)) -> (Maybe (Set PatternSetCollatingElement)) -> (Maybe (Set PatternSetEquivalenceClass)) -> PatternSet newtype PatternSetCharacterClass PatternSetCharacterClass :: String -> PatternSetCharacterClass unSCC :: PatternSetCharacterClass -> String newtype PatternSetCollatingElement PatternSetCollatingElement :: String -> PatternSetCollatingElement unSCE :: PatternSetCollatingElement -> String newtype PatternSetEquivalenceClass PatternSetEquivalenceClass :: String -> PatternSetEquivalenceClass unSEC :: PatternSetEquivalenceClass -> String -- | GroupIndex is for indexing submatches from capturing parenthesized -- groups (PGroup/Group) type GroupIndex = Int -- | Used to track elements of the pattern that accept characters or are -- anchors newtype DoPa DoPa :: Int -> DoPa dopaIndex :: DoPa -> Int -- | I have not been checking, but this should have the property that -- parsing the resulting string should result in an identical Pattern. -- This is not true if starTrans has created PNonCapture and PNonEmpty -- values or a (PStar False). The contents of a "[ ]" grouping are always -- shown in a sorted canonical order. showPattern :: Pattern -> String -- | Do the transformation and simplification in a single traversal. This -- removes the PPlus, PQuest, and PBound values, changing to POr and -- PEmpty and PStar True/False. For some PBound values it adds PNonEmpty -- and PNonCapture semantic marker. It also simplifies to flatten out -- nested POr and PConcat instances and eliminate some unneeded PEmpty -- values. starTrans :: Pattern -> Pattern starTrans' :: Pattern -> Pattern -- | Function to transform a pattern into an equivalent, but less redundant -- form. Nested POr and PConcat are flattened. PEmpty is -- propagated. simplify' :: Pattern -> Pattern -- | Apply a Pattern transfomation function depth first dfsPattern :: (Pattern -> Pattern) -> Pattern -> Pattern instance Eq PatternSetCharacterClass instance Ord PatternSetCharacterClass instance Eq PatternSetCollatingElement instance Ord PatternSetCollatingElement instance Eq PatternSetEquivalenceClass instance Ord PatternSetEquivalenceClass instance Eq PatternSet instance Eq Pattern instance Show Pattern instance Show PatternSetEquivalenceClass instance Show PatternSetCollatingElement instance Show PatternSetCharacterClass instance Show PatternSet -- | This is a POSIX version of parseRegex that allows NUL characters. -- LazyPossessiveBackrefs are not recognized. Anchors ^ and $ are -- recognized. -- -- The PGroup returned always have (Maybe GroupIndex) set to (Just _) and -- never to Nothing. module Text.Regex.TDFA.ReadRegex -- | Return either an error message or a tuple of the Pattern and the -- largest group index and the largest DoPa index (both have smallest -- index of 1). Since the regular expression is supplied as [Char] it -- automatically supports unicode and '\NUL' characters. parseRegex :: String -> Either ParseError (Pattern, (GroupIndex, DoPa)) -- | The CorePattern module deconstructs the Pattern tree created by -- ReadRegex.parseRegex and returns a simpler Q/P tree with annotations -- at each Q node. This will be converted by the TNFA module into a QNFA -- finite automata. -- -- Of particular note, this Pattern to Q/P conversion creates and assigns -- all the internal Tags that will be used during the matching process, -- and associates the captures groups with the tags that represent their -- starting and ending locations and with their immediate parent group. -- -- Each Maximize and Minimize tag is held as either a preTag or a postTag -- by one and only one location in the Q/P tree. The Orbit tags are each -- held by one and only one Star node. Tags that stop a Group are also -- held in perhaps numerous preReset lists. -- -- The additional nullQ::nullView field of Q records the potentially -- complex information about what tests and tags must be used if the -- pattern unQ::P matches 0 zero characters. There can be redundancy in -- nullView, which is eliminated by cleanNullView. -- -- Uses recursive do notation. -- -- 2009 XXX TODO: we can avoid needing tags in the part of the pattern -- after the last capturing group (when right-associative). This is -- flipped for left-associative where the front of the pattern before the -- first capturing group needs no tags. The edge of these regions is -- subtle: both case needs a Maximize tag. One ought to be able to check -- the Pattern: if the root is PConcat then a scan from the end (start) -- looking for the first with an embedded PGroup can be found and the -- PGroup free elements can be wrapped in some new PNOTAG semantic -- indicator. module Text.Regex.TDFA.CorePattern data Q Q :: NullView -> (Position, Maybe Position) -> [Tag] -> [Tag] -> Maybe Tag -> Maybe Tag -> Bool -> Bool -> Wanted -> P -> Q nullQ :: Q -> NullView takes :: Q -> (Position, Maybe Position) preReset :: Q -> [Tag] postSet :: Q -> [Tag] preTag :: Q -> Maybe Tag postTag :: Q -> Maybe Tag tagged :: Q -> Bool childGroups :: Q -> Bool wants :: Q -> Wanted unQ :: Q -> P data P Empty :: P Or :: [Q] -> P Seq :: Q -> Q -> P Star :: Maybe Tag -> [Tag] -> Bool -> Q -> P getOrbit :: P -> Maybe Tag resetOrbits :: P -> [Tag] firstNull :: P -> Bool unStar :: P -> Q Test :: TestInfo -> P OneChar :: Pattern -> P NonEmpty :: Q -> P -- | Known predicates, just Beginning of Line (^) and End of Line ($). Also -- support for GNU extensions is being added: ` beginning of buffer, ' -- end of buffer, < and > for begin and end of words, b and B for -- word boundary and not word boundary. data WhichTest Test_BOL :: WhichTest Test_EOL :: WhichTest Test_BOB :: WhichTest Test_EOB :: WhichTest Test_BOW :: WhichTest Test_EOW :: WhichTest Test_EdgeWord :: WhichTest Test_NotEdgeWord :: WhichTest data Wanted WantsQNFA :: Wanted WantsQT :: Wanted WantsBoth :: Wanted WantsEither :: Wanted type TestInfo = (WhichTest, DoPa) -- | Internal use to indicate type of tag and preference for larger or -- smaller Positions data OP Maximize :: OP Minimize :: OP Orbit :: OP Ignore :: OP newtype SetTestInfo SetTestInfo :: EnumMap WhichTest (EnumSet DoPa) -> SetTestInfo getTests :: SetTestInfo -> EnumMap WhichTest (EnumSet DoPa) type NullView = [(SetTestInfo, TagList)] patternToQ :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> (Q, Array Tag OP, Array GroupIndex [GroupInfo]) cleanNullView :: NullView -> NullView cannotAccept :: Q -> Bool mustAccept :: Q -> Bool instance Eq SetTestInfo instance Show HandleTag instance Eq Wanted instance Show Wanted instance Eq Q instance Show P instance Eq P instance Show Q instance Show SetTestInfo instance Monoid SetTestInfo module Text.Regex.TDFA.NewDFA.MakeTest test_singleline :: Uncons text => WhichTest -> Index -> Char -> text -> Bool test_multiline :: Uncons text => WhichTest -> Index -> Char -> text -> Bool -- | Like Engine, but merely checks to see whether any match at all is -- found. module Text.Regex.TDFA.NewDFA.Tester matchTest :: Uncons text => Regex -> text -> Bool -- | This is the code for the main engine. This captures the posix -- subexpressions. There is also a non-capturing engine, and a testing -- engine. -- -- It is polymorphic over the internal Uncons type class, and specialized -- to produce the needed variants. module Text.Regex.TDFA.NewDFA.Engine_FA execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String module Text.Regex.TDFA.NewDFA.Engine_NC execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] instance Show WScratch -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String module Text.Regex.TDFA.NewDFA.Engine_NC_FA execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] -- | This is the code for the main engine. This captures the posix -- subexpressions. This execMatch also dispatches to -- Engine_NC, Engine_FA, and Engine_FC_NA -- -- It is polymorphic over the internal Uncons type class, and specialized -- to produce the needed variants. module Text.Regex.TDFA.NewDFA.Engine execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] -- | Text.Regex.TDFA.TNFA converts the CorePattern Q/P data (and its -- Pattern leafs) to a QNFA tagged non-deterministic finite automata. -- -- This holds every possible way to follow one state by another, while in -- the DFA these will be reduced by picking a single best transition for -- each (soure,destination) pair. The transitions are heavily and often -- redundantly annotated with tasks to perform, and this redundancy is -- reduced when picking the best transition. So far, keeping all this -- information has helped fix bugs in both the design and implementation. -- -- The QNFA for a Pattern with a starTraned Q/P form with N one character -- accepting leaves has at most N+1 nodes. These nodes repesent the -- future choices after accepting a leaf. The processing of Or nodes -- often reduces this number by sharing at the end of the different -- paths. Turning off capturing while compiling the pattern may (future -- extension) reduce this further for some patterns by processing Star -- with optimizations. This compact design also means that tags are -- assigned not just to be updated before taking a transition (PreUpdate) -- but also after the transition (PostUpdate). -- -- Uses recursive do notation. module Text.Regex.TDFA.TNFA patternToNFA :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> ((Index, Array Index QNFA), Array Tag OP, Array GroupIndex [GroupInfo]) -- | Internal NFA node type data QNFA QNFA :: Index -> QT -> QNFA q_id :: QNFA -> Index q_qt :: QNFA -> QT -- | Internal to QNFA type. data QT Simple :: WinTags -> CharMap QTrans -> QTrans -> QT -- | empty transitions to the virtual winning state qt_win :: QT -> WinTags -- | all ways to leave this QNFA to other or the same QNFA qt_trans :: QT -> CharMap QTrans -- | default ways to leave this QNFA to other or the same QNFA qt_other :: QT -> QTrans Testing :: WhichTest -> EnumSet DoPa -> QT -> QT -> QT -- | The test to perform qt_test :: QT -> WhichTest -- | location(s) of the anchor(s) in the original regexp qt_dopas :: QT -> EnumSet DoPa -- | use qt_a if test is True, else use qt_b qt_a :: QT -> QT -- | use qt_a if test is True, else use qt_b qt_b :: QT -> QT -- | Internal type to represent the tagged transition from one QNFA to -- another (or itself). The key is the Index of the destination QNFA. type QTrans = IntMap [TagCommand] -- | When attached to a QTrans the TagTask can be done before or after -- accepting the character. data TagUpdate PreUpdate :: TagTask -> TagUpdate PostUpdate :: TagTask -> TagUpdate -- | Text.Regex.TDFA.TDFA converts the QNFA from TNFA into the DFA. -- A DFA state corresponds to a Set of QNFA states, repesented as list of -- Index which are used to lookup the DFA state in a lazy Trie which -- holds all possible subsets of QNFA states. module Text.Regex.TDFA.TDFA patternToRegex :: (Pattern, (GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex -- | Internal DFA node, identified by the Set of indices of the QNFA nodes -- it represents. data DFA DFA :: SetIndex -> DT -> DFA d_id :: DFA -> SetIndex d_dt :: DFA -> DT -- | Internal to the DFA node data DT Simple' :: IntMap Instructions -> CharMap Transition -> Transition -> DT -- | Actions to perform to win dt_win :: DT -> IntMap Instructions -- | Transition to accept Char dt_trans :: DT -> CharMap Transition -- | default accepting transition dt_other :: DT -> Transition Testing' :: WhichTest -> EnumSet DoPa -> DT -> DT -> DT -- | The test to perform dt_test :: DT -> WhichTest -- | location(s) of the anchor(s) in the original regexp dt_dopas :: DT -> EnumSet DoPa -- | use dt_a if test is True else use dt_b dt_a :: DT -> DT -- | use dt_a if test is True else use dt_b dt_b :: DT -> DT examineDFA :: Regex -> String nfaToDFA :: ((Index, Array Index QNFA), Array Tag OP, Array GroupIndex [GroupInfo]) -> CompOption -> ExecOption -> Regex dfaMap :: DFA -> Map SetIndex DFA instance Show AlterOrbit -- | This modules provides RegexMaker and RegexLike instances -- for using String with the TDFA backend. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.String -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker data Regex -- | 0 based index from start of source, or (-1) for unused type MatchOffset = Int -- | non-negative length of a match type MatchLength = Int -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls -- enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> String -> Either String Regex execute :: Regex -> String -> Either String (Maybe MatchArray) regexec :: Regex -> String -> Either String (Maybe (String, String, String, [String])) instance RegexContext Regex String String instance RegexLike Regex String instance RegexMaker Regex CompOption ExecOption String -- | This modules provides RegexMaker and RegexLike instances -- for using ByteString with the DFA backend -- (Text.Regex.Lib.WrapDFAEngine and -- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via -- import Text.Regex.TDFA. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.ByteString -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls -- enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> ByteString -> Either String Regex execute :: Regex -> ByteString -> Either String (Maybe MatchArray) regexec :: Regex -> ByteString -> Either String (Maybe (ByteString, ByteString, ByteString, [ByteString])) instance RegexLike Regex ByteString instance RegexMaker Regex CompOption ExecOption ByteString instance RegexContext Regex ByteString ByteString -- | This modules provides RegexMaker and RegexLike instances -- for using ByteString with the DFA backend -- (Text.Regex.Lib.WrapDFAEngine and -- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via -- import Text.Regex.TDFA. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.ByteString.Lazy -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls -- enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> ByteString -> Either String Regex execute :: Regex -> ByteString -> Either String (Maybe MatchArray) regexec :: Regex -> ByteString -> Either String (Maybe (ByteString, ByteString, ByteString, [ByteString])) instance RegexLike Regex ByteString instance RegexMaker Regex CompOption ExecOption ByteString instance RegexContext Regex ByteString ByteString -- | This modules provides RegexMaker and RegexLike instances -- for using ByteString with the DFA backend -- (Text.Regex.Lib.WrapDFAEngine and -- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via -- import Text.Regex.TDFA. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.Sequence -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (1, 2, etc). Controls -- enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> (Seq Char) -> Either String Regex execute :: Regex -> (Seq Char) -> Either String (Maybe MatchArray) regexec :: Regex -> (Seq Char) -> Either String (Maybe (Seq Char, Seq Char, Seq Char, [(Seq Char)])) instance RegexLike Regex (Seq Char) instance RegexMaker Regex CompOption ExecOption (Seq Char) instance RegexContext Regex (Seq Char) (Seq Char) -- | The Text.Regex.TDFA module provides a backend for regular -- expressions. It provides instances for the classes defined and -- documented in Text.Regex.Base and re-exported by this module. -- If you import this along with other backends then you should do so -- with qualified imports (with renaming for convenience). -- -- This regex-tdfa package implements, correctly, POSIX extended regular -- expressions. It is highly unlikely that the regex-posix package on -- your operating system is correct, see -- http:/www.haskell.orghaskellwiki/Regex_Posix for examples of -- your OS's bugs. -- -- This package does provide captured parenthesized subexpressions. -- -- Depending on the text being searched this package supports Unicode. -- The [Char] and (Seq Char) text types support Unicode. The ByteString -- and ByteString.Lazy text types only support ASCII. It is possible to -- support utf8 encoded ByteString.Lazy by using regex-tdfa and -- regex-tdfa-utf8 packages together (required the utf8-string package). -- -- As of version 1.1.1 the following GNU extensions are recognized, all -- anchors: -- -- \` at beginning of entire text -- -- \' at end of entire text -- -- \< at beginning of word -- -- \> at end of word -- -- \b at either beginning or end of word -- -- \B at neither beginning nor end of word -- -- The above are controlled by the newSyntax Bool in -- CompOption. -- -- Where the "word" boundaries means between characters that are and are -- not in the [:word:] character class which contains [a-zA-Z0-9_]. Note -- that < and b may match before the entire text and > and b may -- match at the end of the entire text. -- -- There is no locale support, so collating elements like [.ch.] are -- simply ignored and equivalence classes like [=a=] are converted to -- just [a]. The character classes like [:alnum:] are supported over -- ASCII only, valid classes are alnum, digit, punct, alpha, graph, -- space, blank, lower, upper, cntrl, print, xdigit, word. -- -- This package does not provide "basic" regular expressions. This -- package does not provide back references inside regular expressions. -- -- The package does not provide Perl style regular expressions. Please -- look at the regex-pcre and pcre-light packages instead. module Text.Regex.TDFA getVersion_Text_Regex_TDFA :: Version -- | This is the pure functional matching operator. If the target cannot be -- produced then some empty result will be returned. If there is an error -- in processing, then error will be called. (=~) :: (RegexMaker Regex CompOption ExecOption source, RegexContext Regex source1 target) => source1 -> source -> target -- | This is the monadic matching operator. If a single match fails, then -- fail will be called. (=~~) :: (RegexMaker Regex CompOption ExecOption source, RegexContext Regex source1 target, Monad m) => source1 -> source -> m target