-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Regex based parsers -- -- Regex based parsers. See -- --
-- {-# LANGUAGE OverloadedStrings #-}
--
-- import qualified Data.CharSet as CS
--
-- vowels :: CS.CharSet
-- vowels = "aeiou"
--
module Data.CharSet
-- | A set of Chars.
--
-- The members are stored as contiguous ranges of Chars. This is
-- efficient when the members form contiguous ranges since many
-- Chars can be represented with just one range.
data CharSet
-- | <math>. A set of one Char.
singleton :: Char -> CharSet
-- | <math>. A Char range (inclusive).
fromRange :: (Char, Char) -> CharSet
-- | <math>. Create a set from Chars in a list.
fromList :: [Char] -> CharSet
-- | <math>. Create a set from the given Char ranges
-- (inclusive).
fromRanges :: [(Char, Char)] -> CharSet
-- | <math>. Insert a Char into a set.
insert :: Char -> CharSet -> CharSet
-- | <math>. Insert all Chars in a range (inclusive) into a
-- set.
insertRange :: (Char, Char) -> CharSet -> CharSet
-- | <math>. Delete a Char from a set.
delete :: Char -> CharSet -> CharSet
-- | <math>. Delete a Char range (inclusive) from a set.
deleteRange :: (Char, Char) -> CharSet -> CharSet
-- | <math>. Map a function over all Chars in a set.
map :: (Char -> Char) -> CharSet -> CharSet
-- | <math>. The complement of a set.
not :: CharSet -> CharSet
-- | <math>. The union of two sets.
--
-- Prefer strict left-associative unions, since this is a strict
-- structure and the runtime is linear in the size of the second
-- argument.
union :: CharSet -> CharSet -> CharSet
-- | <math>. The difference of two sets.
difference :: CharSet -> CharSet -> CharSet
-- | <math>. The intersection of two sets.
intersection :: CharSet -> CharSet -> CharSet
-- | <math>. Whether a Char is in a set.
member :: Char -> CharSet -> Bool
-- | <math>. Whether a Char is not in a set.
notMember :: Char -> CharSet -> Bool
-- | <math>. The Chars in a set.
elems :: CharSet -> [Char]
-- | <math>. The contiguous ranges of Chars in a set.
ranges :: CharSet -> [(Char, Char)]
-- | The empty set.
empty :: CharSet
-- | ASCII digits. '0'..'9'. Agrees with isDigit.
digit :: CharSet
-- | ASCII alphabet, digits and underscore.
-- 'A'..'Z','a'..'z','0'..'9','_'.
word :: CharSet
-- | Unicode space characters and the control characters
-- '\t','\n','\r','\f','\v'. Agrees with isSpace.
space :: CharSet
-- | ASCII Chars. '\0'..'\127'. Agrees with
-- isAscii.
ascii :: CharSet
-- | ASCII alphabet. 'A'..'Z','a'..'z'.
asciiAlpha :: CharSet
-- | ASCII uppercase Chars. 'A'..'Z'. Agrees with
-- isAsciiUpper.
asciiUpper :: CharSet
-- | ASCII lowercase Chars. 'a'..'z'. Agrees with
-- isAsciiLower.
asciiLower :: CharSet
-- | Is the internal structure of the set valid?
valid :: CharSet -> Bool
-- | This is an internal module. You probably don't need to import this.
module Regex.Internal.Regex
-- | A regular expression. Operates on a sequence of elements of type
-- c and capable of parsing into an a.
--
-- A RE is a Functor, Applicative, and Alternative.
--
-- -- a <*> empty = empty -- empty <*> a = empty -- (a <|> b) <*> c = (a <*> c) <|> (b <*> c) -- a <*> (b <|> c) = (a <*> b) <|> (a <*> c) ---- -- Note that, because of bias, it is not true that a <|> -- b = b <|> a. -- -- Performance tip: Prefer the smaller of equivalent regexes, i.e. -- prefer (a <|> b) <*> c over (a <*> c) -- <|> (b <*> c). data RE c a [RToken] :: !c -> Maybe a -> RE c a [RFmap] :: !Strictness -> !a1 -> a -> !RE c a1 -> RE c a [RFmap_] :: a -> !RE c a1 -> RE c a [RPure] :: a -> RE c a [RLiftA2] :: !Strictness -> !a1 -> a2 -> a -> !RE c a1 -> !RE c a2 -> RE c a [REmpty] :: RE c a [RAlt] :: !RE c a -> !RE c a -> RE c a [RFold] :: !Strictness -> !Greediness -> !a -> a1 -> a -> a -> !RE c a1 -> RE c a [RMany] :: !a1 -> a -> !a2 -> a -> !a2 -> a1 -> a2 -> !a2 -> !RE c a1 -> RE c a data Strictness Strict :: Strictness NonStrict :: Strictness data Greediness Greedy :: Greediness Minimal :: Greediness -- | A repeating value or a finite list. data Many a -- | A single value repeating indefinitely Repeat :: a -> Many a -- | A finite list Finite :: [a] -> Many a -- | Parse a c into an a if the given function returns -- Just. token :: (c -> Maybe a) -> RE c a -- | Parse any c. anySingle :: RE c c -- | Parse the given c. single :: Eq c => c -> RE c c -- | Parse a c if it satisfies the given predicate. satisfy :: (c -> Bool) -> RE c c -- | Parse many occurences of the given RE. Biased towards -- matching more. -- -- Also see the section "Looping parsers". foldlMany :: (b -> a -> b) -> b -> RE c a -> RE c b -- | Parse many occurences of the given RE. Minimal, i.e. biased -- towards matching less. foldlManyMin :: (b -> a -> b) -> b -> RE c a -> RE c b -- | Zero or more. Biased towards matching more. -- -- Also see the section "Looping parsers". manyr :: RE c a -> RE c (Many a) -- | Zero or one. Minimal, i.e. biased towards zero. -- -- Use Control.Applicative.optional for the same but -- biased towards one. optionalMin :: RE c a -> RE c (Maybe a) -- | One or more. Minimal, i.e. biased towards matching less. someMin :: RE c a -> RE c [a] -- | Zero or more. Minimal, i.e. biased towards matching less. manyMin :: RE c a -> RE c [a] -- | At least n times. Biased towards matching more. atLeast :: Int -> RE c a -> RE c [a] -- | At most n times. Biased towards matching more. atMost :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Biased towards matching more. betweenCount :: (Int, Int) -> RE c a -> RE c [a] -- | At least n times. Minimal, i.e. biased towards matching less. atLeastMin :: Int -> RE c a -> RE c [a] -- | At most n times. Minimal, i.e. biased towards matching less. atMostMin :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Minimal, i.e. biased towards -- matching less. betweenCountMin :: (Int, Int) -> RE c a -> RE c [a] -- | r `sepBy` sep parses zero or more occurences of r, -- separated by sep. Biased towards matching more. sepBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepBy1` sep parses one or more occurences of r, -- separated by sep. Biased towards matching more. sepBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `endBy` sep parses zero or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy :: RE c a -> RE c sep -> RE c [a] -- | r `endBy1` sep parses one or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy` sep parses zero or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy1` sep parses one or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy1 :: RE c a -> RE c sep -> RE c [a] -- | chainl1 r op parses one or more occurences of r, -- separated by op. The result is obtained by left associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainl1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | chainr1 r op parses one or more occurences of r, -- separated by op. The result is obtained by right associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainr1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | Results in the first occurence of the given RE. Fails if no -- occurence is found. toFind :: RE c a -> RE c a -- | Results in all non-overlapping occurences of the given RE. -- Always succeeds. toFindMany :: RE c a -> RE c [a] fmap' :: (a -> b) -> RE c a -> RE c b liftA2' :: (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b foldlMany' :: (b -> a -> b) -> b -> RE c a -> RE c b foldlManyMin' :: (b -> a -> b) -> b -> RE c a -> RE c b instance GHC.Show.Show a => GHC.Show.Show (Regex.Internal.Regex.Many a) instance GHC.Classes.Eq a => GHC.Classes.Eq (Regex.Internal.Regex.Many a) instance GHC.Classes.Ord a => GHC.Classes.Ord (Regex.Internal.Regex.Many a) instance Data.Functor.Classes.Eq1 Regex.Internal.Regex.Many instance Data.Functor.Classes.Ord1 Regex.Internal.Regex.Many instance Data.Functor.Classes.Show1 Regex.Internal.Regex.Many instance GHC.Base.Functor Regex.Internal.Regex.Many instance Data.Foldable.Foldable Regex.Internal.Regex.Many instance Control.DeepSeq.NFData a => Control.DeepSeq.NFData (Regex.Internal.Regex.Many a) instance Control.DeepSeq.NFData1 Regex.Internal.Regex.Many instance GHC.Base.Functor (Regex.Internal.Regex.RE c) instance GHC.Base.Applicative (Regex.Internal.Regex.RE c) instance GHC.Base.Alternative (Regex.Internal.Regex.RE c) instance GHC.Base.Semigroup a => GHC.Base.Semigroup (Regex.Internal.Regex.RE c a) instance GHC.Base.Monoid a => GHC.Base.Monoid (Regex.Internal.Regex.RE c a) -- | This is an internal module. You probably don't need to import this. -- --
-- import qualified Data.Vector.Generic as VG -- from vector -- -- import Regex.Base (Parser) -- import qualified Regex.Base as R -- -- parseVector :: VG.Vector v c => Parser c a -> v c -> Maybe a -- parseVector p v = R.parseFoldr VG.foldr p v ---- --
-- >>> import Control.Applicative (many) -- -- >>> import qualified Data.Vector as V -- -- >>> import Regex.Base (Parser) -- -- >>> import qualified Regex.Base as R -- -- >>> -- -- >>> let p = R.compile $ many ((,) <$> R.satisfy even <*> R.satisfy odd) :: Parser Int [(Int, Int)] -- -- >>> parseVector p (V.fromList [0..5]) -- Just [(0,1),(2,3),(4,5)] -- -- >>> parseVector p (V.fromList [0,2..6]) -- Nothing --parseFoldr :: Foldr f c -> Parser c a -> f -> Maybe a -- | <math>. Run a parser given a "next" action. -- -- Calls next repeatedly to yield elements. A Nothing -- is interpreted as end-of-sequence. -- -- Parses the entire sequence, not just a prefix or an substring. Returns -- without exhausting the input on parse failure. -- --
-- import Conduit (ConduitT, await, sinkNull) -- from conduit -- -- import Regex.Base (Parser) -- import qualified Regex.Base as R -- -- parseConduit :: Monad m => Parser c a -> ConduitT c x m (Maybe a) -- parseConduit p = R.parseNext p await <* sinkNull ---- --
-- >>> import Control.Applicative (many) -- -- >>> import Conduit ((.|), iterMC, runConduit, yieldMany) -- -- >>> import Regex.Base (Parser) -- -- >>> import qualified Regex.Base as R -- -- >>> -- -- >>> let p = R.compile $ many ((,) <$> R.satisfy even <*> R.satisfy odd) :: Parser Int [(Int, Int)] -- -- >>> let printYieldMany xs = yieldMany xs .| iterMC print -- -- >>> runConduit $ printYieldMany [0..5] .| parseConduit p -- 0 -- 1 -- 2 -- 3 -- 4 -- 5 -- Just [(0,1),(2,3),(4,5)] -- -- >>> runConduit $ printYieldMany [0,2..6] .| parseConduit p -- 0 -- 2 -- 4 -- 6 -- Nothing --parseNext :: Monad m => Parser c a -> m (Maybe c) -> m (Maybe a) -- | This is an internal module. You probably don't need to import this. -- Import Regex.Text instead. -- --
-- >>> find (text "meow") "homeowner" -- Just "meow" ---- -- To test whether a Text is present in another Text, -- like above, prefer Data.Text.isInfixOf. -- --
-- >>> find (textIgnoreCase "haskell") "Look I'm Haskelling!" -- Just "Haskell" -- -- >>> find (text "backtracking") "parser-regex" -- Nothing --find :: REText a -> Text -> Maybe a -- | <math>. Find all non-overlapping occurences of the given -- RE in the Text. -- --
-- >>> findAll (text "ana") "banananana" -- ["ana","ana"] ---- --
-- data Roll = Roll -- Natural -- ^ Rolls -- Natural -- ^ Faces on the die -- deriving Show -- -- roll :: REText Roll -- roll = Roll <$> (naturalDec <|> pure 1) <* char 'd' <*> naturalDec ---- --
-- >>> findAll roll "3d6, d10, 2d10" -- [Roll 3 6,Roll 1 10,Roll 2 10] --findAll :: REText a -> Text -> [a] -- | <math>. Split a Text at occurences of the given -- RE. -- --
-- >>> splitOn (char ' ') "Glasses are really versatile" -- ["Glasses","are","really","versatile"] ---- -- For simple splitting, like above, prefer -- Data.Text.words, Data.Text.lines, -- Data.Text.split or Data.Text.splitOn, -- whichever is applicable. -- --
-- >>> splitOn (char ' ' *> oneOf "+-=" *> char ' ') "3 - 1 + 1/2 - 2 = 0" -- ["3","1","1/2","2","0"] ---- -- If the Text starts or ends with a delimiter, the result will -- contain empty Texts at those positions. -- --
-- >>> splitOn (char 'a') "ayaya" -- ["","y","y",""] --splitOn :: REText a -> Text -> [Text] -- | <math>. Replace the first match of the given RE with -- its result. If there is no match, the result is Nothing. -- --
-- >>> replace ("world" <$ text "Haskell") "Hello, Haskell!"
-- Just "Hello, world!"
--
--
--
-- >>> replace ("," <$ some (char '.')) "one...two...ten"
-- Just "one,two...ten"
--
replace :: REText Text -> Text -> Maybe Text
-- | <math>. Replace all non-overlapping matches of the given
-- RE with their results.
--
--
-- >>> replaceAll (" and " <$ text ", ") "red, blue, green"
-- "red and blue and green"
--
--
-- For simple replacements like above, prefer
-- Data.Text.replace.
--
--
-- >>> replaceAll ("Fruit" <$ text "Time" <|> "a banana" <$ text "an arrow") "Time flies like an arrow"
-- "Fruit flies like a banana"
--
--
-- -- sep = oneOf "-./" -- digits n = toMatch (replicateM_ n (oneOf digit)) -- toYmd d m y = mconcat [y, "-", m, "-", d] -- date = toYmd <$> digits 2 <* sep -- <*> digits 2 <* sep -- <*> digits 4 ---- --
-- >>> replaceAll date "01/01/1970, 01-04-1990, 03.07.2011" -- "1970-01-01, 1990-04-01, 2011-07-03" --replaceAll :: REText Text -> Text -> Text instance GHC.Base.Functor Regex.Internal.Text.WithMatch instance GHC.Base.Applicative Regex.Internal.Text.WithMatch -- | This module exports base types and functions. You can use these to -- define functions to work on arbitrary sequence types. -- -- If you want to work with Text or String, import and -- use Regex.Text or Regex.List instead. module Regex.Base -- | A regular expression. Operates on a sequence of elements of type -- c and capable of parsing into an a. -- -- A RE is a Functor, Applicative, and Alternative. -- --
-- a <*> empty = empty -- empty <*> a = empty -- (a <|> b) <*> c = (a <*> c) <|> (b <*> c) -- a <*> (b <|> c) = (a <*> b) <|> (a <*> c) ---- -- Note that, because of bias, it is not true that a <|> -- b = b <|> a. -- -- Performance tip: Prefer the smaller of equivalent regexes, i.e. -- prefer (a <|> b) <*> c over (a <*> c) -- <|> (b <*> c). data RE c a -- | A parser compiled from a RE c a. data Parser c a -- | <math>. Compile a RE c a to a Parser c a. -- -- Note: compile does not limit the size of the RE. See -- compileBounded if you would like to limit the size. -- REs with size greater than (maxBound::Int) `div` 2 -- are not supported and the behavior of such a RE is undefined. compile :: RE c a -> Parser c a -- | <math>. Compile a RE c a to a Parser c a. -- -- Returns Nothing if the size of the RE is greater -- than the provided limit <math>. You may want to use this if you -- suspect that the RE may be too large, for instance if the -- regex is constructed from an untrusted source. -- -- While the exact size of a RE depends on an internal -- representation, it can be assumed to be in the same order as the -- length of a regex pattern corresponding to the RE. compileBounded :: Int -> RE c a -> Maybe (Parser c a) -- | The state maintained for parsing. data ParserState c a -- | <math>. Prepare a parser for input. -- -- Returns Nothing if parsing has failed regardless of further -- input. Otherwise, returns the initial ParserState. prepareParser :: Parser c a -> Maybe (ParserState c a) -- | <math>. Step a parser by feeding a single element c. -- -- Returns Nothing if parsing has failed regardless of further -- input. Otherwise, returns an updated ParserState. stepParser :: ParserState c a -> c -> Maybe (ParserState c a) -- | <math>. Get the parse result for the input fed into the parser -- so far. finishParser :: ParserState c a -> Maybe a -- | A fold function. type Foldr f a = forall b. (a -> b -> b) -> b -> f -> b -- | <math>. Run a parser given a sequence f and a fold -- function. -- -- Parses the entire sequence, not just a prefix or an substring. Returns -- early on parse failure, if the fold can short circuit. -- --
-- import qualified Data.Vector.Generic as VG -- from vector -- -- import Regex.Base (Parser) -- import qualified Regex.Base as R -- -- parseVector :: VG.Vector v c => Parser c a -> v c -> Maybe a -- parseVector p v = R.parseFoldr VG.foldr p v ---- --
-- >>> import Control.Applicative (many) -- -- >>> import qualified Data.Vector as V -- -- >>> import Regex.Base (Parser) -- -- >>> import qualified Regex.Base as R -- -- >>> -- -- >>> let p = R.compile $ many ((,) <$> R.satisfy even <*> R.satisfy odd) :: Parser Int [(Int, Int)] -- -- >>> parseVector p (V.fromList [0..5]) -- Just [(0,1),(2,3),(4,5)] -- -- >>> parseVector p (V.fromList [0,2..6]) -- Nothing --parseFoldr :: Foldr f c -> Parser c a -> f -> Maybe a -- | <math>. Run a parser given a "next" action. -- -- Calls next repeatedly to yield elements. A Nothing -- is interpreted as end-of-sequence. -- -- Parses the entire sequence, not just a prefix or an substring. Returns -- without exhausting the input on parse failure. -- --
-- import Conduit (ConduitT, await, sinkNull) -- from conduit -- -- import Regex.Base (Parser) -- import qualified Regex.Base as R -- -- parseConduit :: Monad m => Parser c a -> ConduitT c x m (Maybe a) -- parseConduit p = R.parseNext p await <* sinkNull ---- --
-- >>> import Control.Applicative (many) -- -- >>> import Conduit ((.|), iterMC, runConduit, yieldMany) -- -- >>> import Regex.Base (Parser) -- -- >>> import qualified Regex.Base as R -- -- >>> -- -- >>> let p = R.compile $ many ((,) <$> R.satisfy even <*> R.satisfy odd) :: Parser Int [(Int, Int)] -- -- >>> let printYieldMany xs = yieldMany xs .| iterMC print -- -- >>> runConduit $ printYieldMany [0..5] .| parseConduit p -- 0 -- 1 -- 2 -- 3 -- 4 -- 5 -- Just [(0,1),(2,3),(4,5)] -- -- >>> runConduit $ printYieldMany [0,2..6] .| parseConduit p -- 0 -- 2 -- 4 -- 6 -- Nothing --parseNext :: Monad m => Parser c a -> m (Maybe c) -> m (Maybe a) -- | Parse a c into an a if the given function returns -- Just. token :: (c -> Maybe a) -> RE c a -- | Parse any c. anySingle :: RE c c -- | Parse the given c. single :: Eq c => c -> RE c c -- | Parse a c if it satisfies the given predicate. satisfy :: (c -> Bool) -> RE c c -- | Parse many occurences of the given RE. Biased towards -- matching more. -- -- Also see the section "Looping parsers". foldlMany :: (b -> a -> b) -> b -> RE c a -> RE c b -- | Parse many occurences of the given RE. Minimal, i.e. biased -- towards matching less. foldlManyMin :: (b -> a -> b) -> b -> RE c a -> RE c b -- | A repeating value or a finite list. data Many a -- | A single value repeating indefinitely Repeat :: a -> Many a -- | A finite list Finite :: [a] -> Many a -- | Zero or more. Biased towards matching more. -- -- Also see the section "Looping parsers". manyr :: RE c a -> RE c (Many a) -- | Zero or one. Minimal, i.e. biased towards zero. -- -- Use Control.Applicative.optional for the same but -- biased towards one. optionalMin :: RE c a -> RE c (Maybe a) -- | One or more. Minimal, i.e. biased towards matching less. someMin :: RE c a -> RE c [a] -- | Zero or more. Minimal, i.e. biased towards matching less. manyMin :: RE c a -> RE c [a] -- | At least n times. Biased towards matching more. atLeast :: Int -> RE c a -> RE c [a] -- | At most n times. Biased towards matching more. atMost :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Biased towards matching more. betweenCount :: (Int, Int) -> RE c a -> RE c [a] -- | At least n times. Minimal, i.e. biased towards matching less. atLeastMin :: Int -> RE c a -> RE c [a] -- | At most n times. Minimal, i.e. biased towards matching less. atMostMin :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Minimal, i.e. biased towards -- matching less. betweenCountMin :: (Int, Int) -> RE c a -> RE c [a] -- | r `sepBy` sep parses zero or more occurences of r, -- separated by sep. Biased towards matching more. sepBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepBy1` sep parses one or more occurences of r, -- separated by sep. Biased towards matching more. sepBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `endBy` sep parses zero or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy :: RE c a -> RE c sep -> RE c [a] -- | r `endBy1` sep parses one or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy` sep parses zero or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy1` sep parses one or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy1 :: RE c a -> RE c sep -> RE c [a] -- | chainl1 r op parses one or more occurences of r, -- separated by op. The result is obtained by left associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainl1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | chainr1 r op parses one or more occurences of r, -- separated by op. The result is obtained by right associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainr1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | Results in the first occurence of the given RE. Fails if no -- occurence is found. toFind :: RE c a -> RE c a -- | Results in all non-overlapping occurences of the given RE. -- Always succeeds. toFindMany :: RE c a -> RE c [a] fmap' :: (a -> b) -> RE c a -> RE c b liftA2' :: (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b foldlMany' :: (b -> a -> b) -> b -> RE c a -> RE c b foldlManyMin' :: (b -> a -> b) -> b -> RE c a -> RE c b -- | This module provides functions for visualizing REs and -- Parsers. See here for some examples. module Regex.Internal.Debug -- | Generate a Graphviz DOT visualization of a RE. -- Optionally takes an alphabet [c], which will be tested -- against the token functions in the RE and accepted -- characters displayed. reToDot :: forall c a. Maybe ([c], [c] -> String) -> RE c a -> String -- | Generate a Graphviz DOT visualization of a Parser. -- Optionally takes an alphabet [c], which will be tested -- against the token functions in the Parser and the -- accepted characters displayed. parserToDot :: forall c a. Maybe ([c], [c] -> String) -> Parser c a -> String -- |
-- >>> dispCharRanges "abc012def"
-- "[('0','2'),('a','f')]"
--
dispCharRanges :: [Char] -> String
instance Data.String.IsString Regex.Internal.Debug.Str
instance GHC.Base.Semigroup Regex.Internal.Debug.Str
instance GHC.Base.Monoid Regex.Internal.Debug.Str
-- | This module offers regexes, combinators, and operations to work with
-- the list type ([]), and also specifically Strings,
-- which are lists of Chars.
module Regex.List
-- | A regular expression. Operates on a sequence of elements of type
-- c and capable of parsing into an a.
--
-- A RE is a Functor, Applicative, and Alternative.
--
-- -- a <*> empty = empty -- empty <*> a = empty -- (a <|> b) <*> c = (a <*> c) <|> (b <*> c) -- a <*> (b <|> c) = (a <*> b) <|> (a <*> c) ---- -- Note that, because of bias, it is not true that a <|> -- b = b <|> a. -- -- Performance tip: Prefer the smaller of equivalent regexes, i.e. -- prefer (a <|> b) <*> c over (a <*> c) -- <|> (b <*> c). data RE c a -- | Parse a c into an a if the given function returns -- Just. token :: (c -> Maybe a) -> RE c a -- | Parse a c if it satisfies the given predicate. satisfy :: (c -> Bool) -> RE c c -- | Parse the given c. single :: Eq c => c -> RE c c -- | Parse any c. anySingle :: RE c c -- | Parse the given list. list :: Eq c => [c] -> RE c [c] -- | Parse any list. Biased towards matching more. manyList :: RE c [c] -- | Parse any non-empty list. Biased towards matching more. someList :: RE c [c] -- | Parse any list. Minimal, i.e. biased towards matching less. manyListMin :: RE c [c] -- | Parse any non-empty String. Minimal, i.e. biased towards -- matching less. someListMin :: RE c [c] -- | Parse the given Char, ignoring case. -- -- Comparisons are performed after applying simple case folding as -- described by the Unicode standard. charIgnoreCase :: Char -> RE Char Char -- | Parse a Char if it is a member of the CharSet. oneOfChar :: CharSet -> RE Char Char -- | Parse the given String, ignoring case. -- -- Comparisons are performed after applying simple case folding as -- described by the Unicode standard. stringIgnoreCase :: String -> RE Char String -- | Parse any String containing members of the CharSet. -- Biased towards matching more. manyStringOf :: CharSet -> RE Char String -- | Parse any non-empty String containing members of the -- CharSet. Biased towards matching more. someStringOf :: CharSet -> RE Char String -- | Parse any String containing members of the CharSet. -- Minimal, i.e. biased towards matching less. manyStringOfMin :: CharSet -> RE Char String -- | Parse any non-empty String containing members of the -- CharSet. Minimal, i.e. biased towards matching less. someStringOfMin :: CharSet -> RE Char String -- | Parse a decimal Natural. Leading zeros are not accepted. -- Biased towards matching more. naturalDec :: RE Char Natural -- | Parse a decimal Integer. Parse an optional sign, '-' -- or '+', followed by the given RE, followed by the -- absolute value of the integer. Leading zeros are not accepted. Biased -- towards matching more. integerDec :: RE Char a -> RE Char Integer -- | Parse a hexadecimal Natural. Both uppercase 'A'..'F' -- and lowercase 'a'..'f' are accepted. Leading zeros are not -- accepted. Biased towards matching more. naturalHex :: RE Char Natural -- | Parse a hexadecimal Integer. Parse an optional sign, -- '-' or '+', followed by the given RE, -- followed by the absolute value of the integer. Both uppercase -- 'A'..'F' and lowercase 'a'..'f' are accepted. -- Leading zeros are not accepted. Biased towards matching more. integerHex :: RE Char a -> RE Char Integer -- | Parse a decimal Word in the range [low..high]. -- Leading zeros are not accepted. Biased towards matching more. wordRangeDec :: (Word, Word) -> RE Char Word -- | Parse a decimal Int in the range [low..high]. Parse -- an optional sign, '-' or '+', followed by the given -- RE, followed by the absolute value of the integer. Leading -- zeros are not accepted. Biased towards matching more. intRangeDec :: RE Char a -> (Int, Int) -> RE Char Int -- | Parse a hexadecimal Word in the range [low..high]. -- Both uppercase 'A'..'F' and lowercase 'a'..'f' are -- accepted. Leading zeros are not accepted. Biased towards matching -- more. wordRangeHex :: (Word, Word) -> RE Char Word -- | Parse a hexadecimal Int in the range [low..high]. -- Parse an optional sign, '-' or '+', followed by the -- given RE, followed by the absolute value of the integer. Both -- uppercase 'A'..'F' and lowercase 'a'..'f' are -- accepted. Leading zeros are not accepted. Biased towards matching -- more. intRangeHex :: RE Char a -> (Int, Int) -> RE Char Int -- | Parse a Word of exactly n decimal digits, including any -- leading zeros. Will not parse values that do not fit in a -- Word. Biased towards matching more. wordDecN :: Int -> RE Char Word -- | Parse a Word of exactly n hexadecimal digits, including any -- leading zeros. Both uppercase 'A'..'F' and lowercase -- 'a'..'f' are accepted. Will not parse values that do not fit -- in a Word. Biased towards matching more. wordHexN :: Int -> RE Char Word -- | Parse many occurences of the given RE. Biased towards -- matching more. -- -- Also see the section "Looping parsers". foldlMany :: (b -> a -> b) -> b -> RE c a -> RE c b -- | Parse many occurences of the given RE. Minimal, i.e. biased -- towards matching less. foldlManyMin :: (b -> a -> b) -> b -> RE c a -> RE c b -- | Rebuild the RE such that the result is the matched section of -- the list instead. toMatch :: RE c a -> RE c [c] -- | Rebuild the RE to include the matched section of the list -- alongside the result. withMatch :: RE c a -> RE c ([c], a) -- | A repeating value or a finite list. data Many a -- | A single value repeating indefinitely Repeat :: a -> Many a -- | A finite list Finite :: [a] -> Many a -- | Zero or more. Biased towards matching more. -- -- Also see the section "Looping parsers". manyr :: RE c a -> RE c (Many a) -- | Zero or one. Minimal, i.e. biased towards zero. -- -- Use Control.Applicative.optional for the same but -- biased towards one. optionalMin :: RE c a -> RE c (Maybe a) -- | One or more. Minimal, i.e. biased towards matching less. someMin :: RE c a -> RE c [a] -- | Zero or more. Minimal, i.e. biased towards matching less. manyMin :: RE c a -> RE c [a] -- | At least n times. Biased towards matching more. atLeast :: Int -> RE c a -> RE c [a] -- | At most n times. Biased towards matching more. atMost :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Biased towards matching more. betweenCount :: (Int, Int) -> RE c a -> RE c [a] -- | At least n times. Minimal, i.e. biased towards matching less. atLeastMin :: Int -> RE c a -> RE c [a] -- | At most n times. Minimal, i.e. biased towards matching less. atMostMin :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Minimal, i.e. biased towards -- matching less. betweenCountMin :: (Int, Int) -> RE c a -> RE c [a] -- | r `sepBy` sep parses zero or more occurences of r, -- separated by sep. Biased towards matching more. sepBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepBy1` sep parses one or more occurences of r, -- separated by sep. Biased towards matching more. sepBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `endBy` sep parses zero or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy :: RE c a -> RE c sep -> RE c [a] -- | r `endBy1` sep parses one or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy` sep parses zero or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy1` sep parses one or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy1 :: RE c a -> RE c sep -> RE c [a] -- | chainl1 r op parses one or more occurences of r, -- separated by op. The result is obtained by left associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainl1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | chainr1 r op parses one or more occurences of r, -- separated by op. The result is obtained by right associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainr1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | <math>. Parse a list with a RE. -- -- Parses the entire list, not just a prefix or a substring. Returns -- early without demanding the entire list on parse failure. -- -- Uses compile, see the note there. -- -- If parsing multiple lists using the same RE, it is wasteful -- to compile the RE every time. So, prefer to -- --
-- >>> find (list "meow") "homeowner" -- Just "meow" ---- -- To test whether a list is present in another list, like above, prefer -- Data.List.isInfixOf. -- --
-- >>> find (stringIgnoreCase "haskell") "Look I'm Haskelling!" -- Just "Haskell" -- -- >>> find (list "backtracking") "parser-regex" -- Nothing --find :: RE c a -> [c] -> Maybe a -- | <math>. Find all non-overlapping occurences of the given -- RE in the list. -- --
-- >>> findAll (list "ana") "banananana" -- ["ana","ana"] ---- --
-- data Roll = Roll -- Natural -- ^ Rolls -- Natural -- ^ Faces on the die -- deriving Show -- -- roll :: RE Char Roll -- roll = Roll <$> (naturalDec <|> pure 1) <* single 'd' <*> naturalDec ---- --
-- >>> findAll roll "3d6, d10, 2d10" -- [Roll 3 6,Roll 1 10,Roll 2 10] --findAll :: RE c a -> [c] -> [a] -- | <math>. Split a list at occurences of the given RE. -- --
-- >>> splitOn (single ' ') "Glasses are really versatile" -- ["Glasses","are","really","versatile"] ---- -- In cases like above, prefer using words or lines -- instead, if applicable. -- --
-- >>> splitOn (single ' ' *> oneOfChar "+-=" *> single ' ') "3 - 1 + 1/2 - 2 = 0" -- ["3","1","1/2","2","0"] ---- -- If the list starts or ends with a delimiter, the result will contain -- empty lists at those positions. -- --
-- >>> splitOn (single 'a') "ayaya" -- ["","y","y",""] --splitOn :: RE c a -> [c] -> [[c]] -- | <math>. Replace the first match of the given RE with -- its result. If there is no match, the result is Nothing. -- --
-- >>> replace ("world" <$ list "Haskell") "Hello, Haskell!"
-- Just "Hello, world!"
--
--
--
-- >>> replace ("," <$ some (single '.')) "one...two...ten"
-- Just "one,two...ten"
--
replace :: RE c [c] -> [c] -> Maybe [c]
-- | <math>. Replace all non-overlapping matches of the given
-- RE with their results.
--
--
-- >>> replaceAll (" and " <$ list ", ") "red, blue, green"
-- "red and blue and green"
--
--
--
-- >>> replaceAll ("Fruit" <$ list "Time" <|> "a banana" <$ list "an arrow") "Time flies like an arrow"
-- "Fruit flies like a banana"
--
--
-- -- sep = oneOfChar "-./" -- digits n = replicateM n (oneOfChar digit) -- toYmd d m y = concat [y, "-", m, "-", d] -- date = toYmd <$> digits 2 <* sep -- <*> digits 2 <* sep -- <*> digits 4 ---- --
-- >>> replaceAll date "01/01/1970, 01-04-1990, 03.07.2011" -- "1970-01-01, 1990-04-01, 2011-07-03" --replaceAll :: RE c [c] -> [c] -> [c] -- | This module offers regexes, combinators, and operations to work with -- the Text type from the text package. module Regex.Text -- | A regular expression. Operates on a sequence of elements of type -- c and capable of parsing into an a. -- -- A RE is a Functor, Applicative, and Alternative. -- --
-- a <*> empty = empty -- empty <*> a = empty -- (a <|> b) <*> c = (a <*> c) <|> (b <*> c) -- a <*> (b <|> c) = (a <*> b) <|> (a <*> c) ---- -- Note that, because of bias, it is not true that a <|> -- b = b <|> a. -- -- Performance tip: Prefer the smaller of equivalent regexes, i.e. -- prefer (a <|> b) <*> c over (a <*> c) -- <|> (b <*> c). data RE c a -- | The token type used for parsing Text. data TextToken -- | A type alias for convenience. -- -- A function which accepts a RE c a will accept a REText -- a. type REText = RE TextToken -- | Parse a Char into an a if the given function returns -- Just. token :: (Char -> Maybe a) -> REText a -- | Parse a Char if it satisfies the given predicate. satisfy :: (Char -> Bool) -> REText Char -- | Parse the given Char. char :: Char -> REText Char -- | Parse the given Char, ignoring case. -- -- Comparisons are performed after applying simple case folding as -- described by the Unicode standard. charIgnoreCase :: Char -> REText Char -- | Parse any Char. anyChar :: REText Char -- | Parse a Char if it is a member of the CharSet. oneOf :: CharSet -> REText Char -- | Parse the given Text. text :: Text -> REText Text -- | Parse the given Text, ignoring case. -- -- Comparisons are performed after applying simple case folding as -- described by the Unicode standard. textIgnoreCase :: Text -> REText Text -- | Parse any Text. Biased towards matching more. manyText :: REText Text -- | Parse any non-empty Text. Biased towards matching more. someText :: REText Text -- | Parse any Text. Minimal, i.e. biased towards matching less. manyTextMin :: REText Text -- | Parse any non-empty Text. Minimal, i.e. biased towards -- matching less. someTextMin :: REText Text -- | Parse any Text containing members of the CharSet. -- Biased towards matching more. manyTextOf :: CharSet -> REText Text -- | Parse any non-empty Text containing members of the -- CharSet. Biased towards matching more. someTextOf :: CharSet -> REText Text -- | Parse any Text containing members of the CharSet. -- Minimal, i.e. biased towards matching less. manyTextOfMin :: CharSet -> REText Text -- | Parse any non-empty Text containing members of the -- CharSet. Minimal, i.e. biased towards matching less. someTextOfMin :: CharSet -> REText Text -- | Parse a decimal Natural. Leading zeros are not accepted. -- Biased towards matching more. naturalDec :: REText Natural -- | Parse a decimal Integer. Parse an optional sign, '-' -- or '+', followed by the given RE, followed by the -- absolute value of the integer. Leading zeros are not accepted. Biased -- towards matching more. integerDec :: REText a -> REText Integer -- | Parse a hexadecimal Natural. Both uppercase 'A'..'F' -- and lowercase 'a'..'f' are accepted. Leading zeros are not -- accepted. Biased towards matching more. naturalHex :: REText Natural -- | Parse a hexadecimal Integer. Parse an optional sign, -- '-' or '+', followed by the given RE, -- followed by the absolute value of the integer. Both uppercase -- 'A'..'F' and lowercase 'a'..'f' are accepted. -- Leading zeros are not accepted. Biased towards matching more. integerHex :: REText a -> REText Integer -- | Parse a decimal Word in the range [low..high]. -- Leading zeros are not accepted. Biased towards matching more. wordRangeDec :: (Word, Word) -> REText Word -- | Parse a decimal Int in the range [low..high]. Parse -- an optional sign, '-' or '+', followed by the given -- RE, followed by the absolute value of the integer. Leading -- zeros are not accepted. Biased towards matching more. intRangeDec :: REText a -> (Int, Int) -> REText Int -- | Parse a hexadecimal Word in the range [low..high]. -- Both uppercase 'A'..'F' and lowercase 'a'..'f' are -- accepted. Leading zeros are not accepted. Biased towards matching -- more. wordRangeHex :: (Word, Word) -> REText Word -- | Parse a hexadecimal Int in the range [low..high]. -- Parse an optional sign, '-' or '+', followed by the -- given RE, followed by the absolute value of the integer. Both -- uppercase 'A'..'F' and lowercase 'a'..'f' are -- accepted. Leading zeros are not accepted. Biased towards matching -- more. intRangeHex :: REText a -> (Int, Int) -> REText Int -- | Parse a Word of exactly n decimal digits, including any -- leading zeros. Will not parse values that do not fit in a -- Word. Biased towards matching more. wordDecN :: Int -> REText Word -- | Parse a Word of exactly n hexadecimal digits, including any -- leading zeros. Both uppercase 'A'..'F' and lowercase -- 'a'..'f' are accepted. Will not parse values that do not fit -- in a Word. Biased towards matching more. wordHexN :: Int -> REText Word -- | Parse many occurences of the given RE. Biased towards -- matching more. -- -- Also see the section "Looping parsers". foldlMany :: (b -> a -> b) -> b -> RE c a -> RE c b -- | Parse many occurences of the given RE. Minimal, i.e. biased -- towards matching less. foldlManyMin :: (b -> a -> b) -> b -> RE c a -> RE c b -- | Rebuild the RE such that the result is the matched -- Text instead. toMatch :: REText a -> REText Text -- | Rebuild the RE to include the matched Text alongside -- the result. withMatch :: REText a -> REText (Text, a) -- | A repeating value or a finite list. data Many a -- | A single value repeating indefinitely Repeat :: a -> Many a -- | A finite list Finite :: [a] -> Many a -- | Zero or more. Biased towards matching more. -- -- Also see the section "Looping parsers". manyr :: RE c a -> RE c (Many a) -- | Zero or one. Minimal, i.e. biased towards zero. -- -- Use Control.Applicative.optional for the same but -- biased towards one. optionalMin :: RE c a -> RE c (Maybe a) -- | One or more. Minimal, i.e. biased towards matching less. someMin :: RE c a -> RE c [a] -- | Zero or more. Minimal, i.e. biased towards matching less. manyMin :: RE c a -> RE c [a] -- | At least n times. Biased towards matching more. atLeast :: Int -> RE c a -> RE c [a] -- | At most n times. Biased towards matching more. atMost :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Biased towards matching more. betweenCount :: (Int, Int) -> RE c a -> RE c [a] -- | At least n times. Minimal, i.e. biased towards matching less. atLeastMin :: Int -> RE c a -> RE c [a] -- | At most n times. Minimal, i.e. biased towards matching less. atMostMin :: Int -> RE c a -> RE c [a] -- | Between m and n times (inclusive). Minimal, i.e. biased towards -- matching less. betweenCountMin :: (Int, Int) -> RE c a -> RE c [a] -- | r `sepBy` sep parses zero or more occurences of r, -- separated by sep. Biased towards matching more. sepBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepBy1` sep parses one or more occurences of r, -- separated by sep. Biased towards matching more. sepBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `endBy` sep parses zero or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy :: RE c a -> RE c sep -> RE c [a] -- | r `endBy1` sep parses one or more occurences of r, -- separated and ended by sep. Biased towards matching more. endBy1 :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy` sep parses zero or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy :: RE c a -> RE c sep -> RE c [a] -- | r `sepEndBy1` sep parses one or more occurences of -- r, separated and optionally ended by sep. Biased -- towards matching more. sepEndBy1 :: RE c a -> RE c sep -> RE c [a] -- | chainl1 r op parses one or more occurences of r, -- separated by op. The result is obtained by left associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainl1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | chainr1 r op parses one or more occurences of r, -- separated by op. The result is obtained by right associative -- application of all functions returned by op to the values -- returned by p. Biased towards matching more. chainr1 :: RE c a -> RE c (a -> a -> a) -> RE c a -- | <math>. Parse a Text with a REText. -- -- Parses the entire Text, not just a prefix or a substring. -- -- Uses compile, see the note there. -- -- If parsing multiple Texts using the same RE, it is -- wasteful to compile the RE every time. So, prefer to -- --
-- >>> find (text "meow") "homeowner" -- Just "meow" ---- -- To test whether a Text is present in another Text, -- like above, prefer Data.Text.isInfixOf. -- --
-- >>> find (textIgnoreCase "haskell") "Look I'm Haskelling!" -- Just "Haskell" -- -- >>> find (text "backtracking") "parser-regex" -- Nothing --find :: REText a -> Text -> Maybe a -- | <math>. Find all non-overlapping occurences of the given -- RE in the Text. -- --
-- >>> findAll (text "ana") "banananana" -- ["ana","ana"] ---- --
-- data Roll = Roll -- Natural -- ^ Rolls -- Natural -- ^ Faces on the die -- deriving Show -- -- roll :: REText Roll -- roll = Roll <$> (naturalDec <|> pure 1) <* char 'd' <*> naturalDec ---- --
-- >>> findAll roll "3d6, d10, 2d10" -- [Roll 3 6,Roll 1 10,Roll 2 10] --findAll :: REText a -> Text -> [a] -- | <math>. Split a Text at occurences of the given -- RE. -- --
-- >>> splitOn (char ' ') "Glasses are really versatile" -- ["Glasses","are","really","versatile"] ---- -- For simple splitting, like above, prefer -- Data.Text.words, Data.Text.lines, -- Data.Text.split or Data.Text.splitOn, -- whichever is applicable. -- --
-- >>> splitOn (char ' ' *> oneOf "+-=" *> char ' ') "3 - 1 + 1/2 - 2 = 0" -- ["3","1","1/2","2","0"] ---- -- If the Text starts or ends with a delimiter, the result will -- contain empty Texts at those positions. -- --
-- >>> splitOn (char 'a') "ayaya" -- ["","y","y",""] --splitOn :: REText a -> Text -> [Text] -- | <math>. Replace the first match of the given RE with -- its result. If there is no match, the result is Nothing. -- --
-- >>> replace ("world" <$ text "Haskell") "Hello, Haskell!"
-- Just "Hello, world!"
--
--
--
-- >>> replace ("," <$ some (char '.')) "one...two...ten"
-- Just "one,two...ten"
--
replace :: REText Text -> Text -> Maybe Text
-- | <math>. Replace all non-overlapping matches of the given
-- RE with their results.
--
--
-- >>> replaceAll (" and " <$ text ", ") "red, blue, green"
-- "red and blue and green"
--
--
-- For simple replacements like above, prefer
-- Data.Text.replace.
--
--
-- >>> replaceAll ("Fruit" <$ text "Time" <|> "a banana" <$ text "an arrow") "Time flies like an arrow"
-- "Fruit flies like a banana"
--
--
-- -- sep = oneOf "-./" -- digits n = toMatch (replicateM_ n (oneOf digit)) -- toYmd d m y = mconcat [y, "-", m, "-", d] -- date = toYmd <$> digits 2 <* sep -- <*> digits 2 <* sep -- <*> digits 4 ---- --
-- >>> replaceAll date "01/01/1970, 01-04-1990, 03.07.2011" -- "1970-01-01, 1990-04-01, 2011-07-03" --replaceAll :: REText Text -> Text -> Text