-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A quasi-quoter for typeful results of regex captures. -- -- Provides a quasi-quoter for regular expressions which yields a tuple, -- of appropriate arity and types, representing the results of the -- captures. Allows the user to specify parsers for captures as inline -- Haskell. Can also be used to provide typeful pattern matching in -- function definitions and pattern matches. Also, it precompiles the -- regular expressions into a PCRE compiled byte-array representation, at -- compile time. @package rex @version 0.4.1 -- | This module provides support for accessing the compiled representation -- of PCRE regular expressions. This byte array encodes a lookup-table -- based representation for the regular expression, and can be safely -- written out and read back by compatible PCRE versions. module Text.Regex.PCRE.Precompile -- | A synonym indicating which ByteStrings represent PCRE-format compiled -- data. type CompiledBytes = ByteString -- | Compiles the given regular expression, and assuming nothing bad -- happens, yields the bytestring filled with PCRE's compiled -- representation. precompile :: ByteString -> [PCREOption] -> IO (Maybe CompiledBytes) -- | Takes a compiled regular expression, and if successful, yields the -- compiled representation. regexToTable :: Regex -> IO (Maybe CompiledBytes) -- | Creates a regular expression from the compiled representation. regexFromTable :: CompiledBytes -> IO Regex -- | This module provides a template Haskell quasiquoter for regular -- expressions, which provides the following features: -- -- 1) Compile-time checking that the regular expression is valid. -- -- 2) Arity of resulting tuple based on the number of selected capture -- patterns in the regular expression. -- -- 3) Allows for the inline interpolation of mapping functions :: String -- -> a. -- -- 4) Precompiles the regular expression at compile time, by calling into -- the PCRE library and storing a ByteString literal -- representation of its state. -- -- NOTE: for some unknown reason this feature is currently broken, and so -- off by default. -- -- 5) Compile-time configurable to use different PCRE options, turn off -- precompilation, use ByteStrings, or set a default mapping -- expression. -- -- Since this is a quasiquoter library that generates code using view -- patterns, the following extensions are required: -- --
-- {-# LANGUAGE TemplateHaskell, QuasiQuotes, ViewPatterns #-}
--
--
-- Here's a silly example which parses peano numbers of the form Z, S Z,
-- S S Z, etc. The s+ means that it is not sensitive to the quantity or
-- type of seperating whitespace. (these examples can also be found in
-- Test.hs)
--
--
-- peano :: String -> Maybe Int
-- peano = [rex|^(?{ length . filter (=='S') } \s* (?:S\s+)*Z)\s*$|]
--
--
-- -- *Main> peano "Z" -- Just 0 -- *Main> peano "S Z" -- Just 1 -- *Main> peano "S S Z" -- Just 2 -- *Main> peano "S S S Z" -- Just 3 -- *Main> peano "invalid" -- Nothing ---- -- The token "(?{" introduces a capture group which has a mapping applied -- to the -- result - in this case "length . filter (==S)". If -- the ?{ ... } are omitted, then the capture group is not taken as part -- of the results of the match. If the contents of the ?{ ... } is -- omitted, then id is assumed: -- --
-- parsePair :: String -> Maybe (String, String)
-- parsePair = [rex|^<\s* (?{ }[^\s,>]+) \s*,\s* (?{ }[^\s,>]+) \s*>$|]
--
--
-- The following example is derived from
-- http:www.regular-expressions.info/dates.html
--
--
-- parseDate :: String -> Maybe (Int, Int, Int)
-- parseDate [rex|^(?{ read -> y }(?:19|20)\d\d)[- /.]
-- (?{ read -> m }0[1-9]|1[012])[- /.]
-- (?{ read -> d }0[1-9]|[12][0-9]|3[01])$|]
-- | (d > 30 && (m `elem` [4, 6, 9, 11]))
-- || (m == 2 &&
-- (d == 29 && not (mod y 4 == 0 && (mod y 100 /= 0 || mod y 400 == 0)))
-- || (d > 29)) = Nothing
-- | otherwise = Just (y, m, d)
-- parseDate _ = Nothing
--
--
-- The above example makes use of the regex quasi-quoter as a pattern
-- matcher. The interpolated Haskell patterns are used to construct an
-- implicit view pattern out of the inlined ones. The above pattern is
-- expanded to the equivalent:
--
--
-- parseDate ([rex|^(?{ read }(?:19|20)\d\d)[- /.]
-- (?{ read }0[1-9]|1[012])[- /.]
-- (?{ read }0[1-9]|[12][0-9]|3[01])$|]
-- -> Just (y, m, d))
--
--
-- Caveat: Since haskell-src-exts does not support parsing view-patterns,
-- the above is implemented as a relatively naive split on "->". It
-- presumes that the last "->" in the interpolated pattern seperates
-- the pattern from an expression on the left. This allows for lambdas to
-- be present in the expression, but prevents nesting view patterns.
--
-- There are also a few other inelegances:
--
-- 1) PCRE captures, unlike .NET regular expressions, yield the last
-- capture made by a particular pattern. So, for example, (...)*, will
-- only yield one match for .... Ideally these would be detected
-- and yield an implicit [a].
--
-- 2) Patterns with disjunction between captures ((?{f}a) | (?{g}b)) will
-- provide the empty string to one of f / g. In the case of pattern
-- expressions, it would be convenient to be able to map multiple
-- captures into a single variable / pattern, preferring the first
-- non-empty option. The general logic for this is a bit complicated, and
-- postponed for a later release.
--
-- Since pcre-light is a wrapper over a C API, the most efficient
-- interface is ByteStrings, as it does not natively speak Haskell lists.
-- The [rex| ... ] quasiquoter implicitely packs the input into a
-- bystestring, and unpacks the results to strings before providing them
-- to your mappers. The brex QuasiQuoter is provided for
-- this purpose. You can also define your own QuasiQuoter - the
-- definitions of the default configurations are as follows:
--
--
-- rex = rexWithConf $ defaultRexConf
-- brex = rexWithConf $ defaultRexConf { rexByteString = True }
--
-- defaultRexConf = RexConf False True "id" [PCRE.extended] []
--
--
-- The first False specifies to use String rather than
-- ByteString. The True argument specifies to use
-- precompilation. -- The string following is the default mapping
-- expression, used when omitted. Due to GHC staging restrictions, your
-- configuration will need to be in a different module than its usage.
--
-- Inspired by Matt Morrow's regexqq package:
-- http://hackage.haskell.org/packages/archive/regexqq/latest/doc/html/src/Text-Regex-PCRE-QQ.html
--
-- And code from Erik Charlebois's interpolatedstring-qq package:
-- http://hackage.haskell.org/packages/archive/interpolatedstring-qq/latest/doc/html/Text-InterpolatedString-QQ.html
module Text.Regex.PCRE.Rex
-- | Default regular expression quasiquoter for Strings and
-- ByteStrings, respectively.
rex :: QuasiQuoter
-- | Default regular expression quasiquoter for Strings and
-- ByteStrings, respectively.
brex :: QuasiQuoter
-- | A configureable regular-expression QuasiQuoter. Takes the options to
-- pass to the PCRE engine, along with Bools to flag
-- ByteString usage and non-compilation respecively. The
-- provided String indicates which mapping function to use, when
-- one is omitted - "(?{} ...)".
rexWithConf :: RexConf -> QuasiQuoter
data RexConf
RexConf :: Bool -> Bool -> String -> [PCREOption] -> [PCREExecOption] -> RexConf
rexByteString :: RexConf -> Bool
rexCompiled :: RexConf -> Bool
rexView :: RexConf -> String
rexPCREOpts :: RexConf -> [PCREOption]
rexPCREExecOpts :: RexConf -> [PCREExecOption]
-- | Default rex configuration, which specifies that the regexes operate on
-- strings, don't postprocess the matched patterns, and use
-- extended. This setting causes whitespace to be nonsemantic, and
-- ignores # comments.
defaultRexConf :: RexConf
-- | This is a QuasiQuoter transformer, which allows for a
-- whitespace-sensitive quasi-quoter to be broken over multiple lines.
-- The default rex and brex functions do not need this as
-- they are already whitespace insensitive. However, if you create your
-- own configuration, which omits the extended parameter, then
-- this could be useful. The leading space of each line is ignored, and
-- all newlines removed.
makeQuasiMultiline :: QuasiQuoter -> QuasiQuoter
-- | A possibly useful utility function - yields Just x when there
-- is a valid parse, and Nothing otherwise.
maybeRead :: Read a => String -> Maybe a
-- | Given a desired list-length, if the passed list is too short, it is
-- padded with the given element. Otherwise, it trims.
padRight :: a -> Int -> [a] -> [a]