{-# LANGUAGE BangPatterns #-}

-- ------------------------------------------------------------

{- |
   Module     : Text.Regex.XMLSchema.Generic
   Copyright  : Copyright (C) 2014- Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt <uwe@fh-wedel.de>
   Stability  : stable
   Portability: portable

   Convenient functions for W3C XML Schema Regular Expression Matcher.
   For internals see 'Text.Regex.XMLSchema.Regex'

   Grammar can be found under <http://www.w3.org/TR/xmlschema11-2/#regexs>

-}

-- ------------------------------------------------------------

module Text.Regex.XMLSchema.Generic.Matching
    ( grep
    , grepExt
    , grepRE
    , grepREwithLineNum

    , match
    , matchExt
    , matchSubex

    , sed
    , sedExt

    , split
    , splitExt
    , splitSubex

    , tokenize
    , tokenizeExt
    , tokenize'
    , tokenizeExt'
    , tokenizeSubex

    , matchRE
    , matchSubexRE
    , sedRE
    , splitRE
    , splitSubexRE
    , tokenizeRE
    , tokenizeRE'
    , tokenizeSubexRE
    )
where

import           Control.Arrow

import           Data.Maybe

import           Text.Regex.XMLSchema.Generic.Regex
import           Text.Regex.XMLSchema.Generic.RegexParser
import           Text.Regex.XMLSchema.Generic.StringLike

{-
import Debug.Trace      (traceShow)

trc :: Show a => String -> a -> a
trc msg x = traceShow (msg, x) x

-- -}
-- ------------------------------------------------------------

-- | split a string by taking the longest prefix matching a regular expression
--
-- @Nothing@ is returned in case there is no matching prefix,
-- else the pair of prefix and rest is returned

splitRE         :: StringLike s => GenRegex s -> s -> Maybe (s, s)
splitRE :: GenRegex s -> s -> Maybe (s, s)
splitRE GenRegex s
re s
input
                = do
                  (SubexResults s
sms, s
rest) <- GenRegex s -> s -> Maybe (SubexResults s, s)
forall s.
StringLike s =>
GenRegex s -> s -> Maybe (SubexResults s, s)
splitWithRegex GenRegex s
re s
input
                  (s, s) -> Maybe (s, s)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Label s, s) -> s
forall a b. (a, b) -> b
snd ((Label s, s) -> s)
-> (SubexResults s -> (Label s, s)) -> SubexResults s -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SubexResults s -> (Label s, s)
forall a. [a] -> a
head (SubexResults s -> s) -> SubexResults s -> s
forall a b. (a -> b) -> a -> b
$ SubexResults s
sms, s
rest)

-- | convenient function for 'splitRE'
--
-- examples:
--
-- > split "a*b" "abc" = ("ab","c")
-- > split "a*"  "bc"  = ("", "bc")    -- "a*" matches ""
-- > split "a+"  "bc"  = ("", "bc")    -- "a+" does not match, no split
-- > split "["   "abc" = ("", "abc")   -- "["  syntax error, no split

split           :: StringLike s => s -> s -> (s, s)
split :: s -> s -> (s, s)
split           = (s -> GenRegex s) -> s -> s -> (s, s)
forall s. StringLike s => (s -> GenRegex s) -> s -> s -> (s, s)
split' s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegex

-- | split with extended syntax

splitExt        :: StringLike s => s -> s -> (s, s)
splitExt :: s -> s -> (s, s)
splitExt        = (s -> GenRegex s) -> s -> s -> (s, s)
forall s. StringLike s => (s -> GenRegex s) -> s -> s -> (s, s)
split' s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt

split'           :: StringLike s => (s -> GenRegex s) -> s -> s -> (s, s)
split' :: (s -> GenRegex s) -> s -> s -> (s, s)
split' s -> GenRegex s
parseRe s
re s
input
                 = (s, s) -> Maybe (s, s) -> (s, s)
forall a. a -> Maybe a -> a
fromMaybe (s
forall a. StringLike a => a
emptyS, s
input)
                  (Maybe (s, s) -> (s, s)) -> (s -> Maybe (s, s)) -> s -> (s, s)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GenRegex s -> s -> Maybe (s, s)
forall s. StringLike s => GenRegex s -> s -> Maybe (s, s)
splitRE (GenRegex s -> s -> Maybe (s, s))
-> (s -> GenRegex s) -> s -> s -> Maybe (s, s)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
parseRe (s -> s -> Maybe (s, s)) -> s -> s -> Maybe (s, s)
forall a b. (a -> b) -> a -> b
$ s
re) (s -> (s, s)) -> s -> (s, s)
forall a b. (a -> b) -> a -> b
$ s
input

-- ------------------------------------------------------------

-- | split a string by removing the longest prefix matching a regular expression
-- and then return the list of subexpressions found in the matching part
--
-- @Nothing@ is returned in case of no matching prefix,
-- else the list of pairs of labels and submatches and the
-- rest is returned

splitSubexRE    :: StringLike s => GenRegex s -> s -> Maybe ([(s, s)], s)
splitSubexRE :: GenRegex s -> s -> Maybe ([(s, s)], s)
splitSubexRE GenRegex s
re s
input
                = do
                  (SubexResults s
sms, s
rest) <- GenRegex s -> s -> Maybe (SubexResults s, s)
forall s.
StringLike s =>
GenRegex s -> s -> Maybe (SubexResults s, s)
splitWithRegex GenRegex s
re s
input
                  ([(s, s)], s) -> Maybe ([(s, s)], s)
forall (m :: * -> *) a. Monad m => a -> m a
return (((Label s, s) -> (s, s)) -> SubexResults s -> [(s, s)]
forall a b. (a -> b) -> [a] -> [b]
map ((Label s -> s) -> (Label s, s) -> (s, s)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first Label s -> s
forall a. HasCallStack => Maybe a -> a
fromJust) (SubexResults s -> [(s, s)])
-> (SubexResults s -> SubexResults s) -> SubexResults s -> [(s, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> SubexResults s -> SubexResults s
forall a. Int -> [a] -> [a]
drop Int
1 (SubexResults s -> [(s, s)]) -> SubexResults s -> [(s, s)]
forall a b. (a -> b) -> a -> b
$ SubexResults s
sms, s
rest)

-- | convenient function for 'splitSubex', uses extended syntax
--
-- examples:
--
-- > splitSubex "({1}a*)b"  "abc" = ([("1","a")],"c")
-- > splitSubex "({2}a*)"   "bc"  = ([("2","")], "bc")
-- > splitSubex "({1}a|b)+" "abc" = ([("1","a"),("1","b")],"c")        -- subex 1 matches 2 times
-- >
-- > splitSubex ".*({x}a*)" "aa"  = ([("x",""),("x","a"),("x","aa")],"")
-- >                                                                   -- nondeterminism: 3 matches for a*
-- >
-- > splitSubex "({1}do)|({2}[a-z]+)" "do you know"
-- >                                = ([("1","do"),("2","do")]," you know")
-- >                                                                   -- nondeterminism: 2 matches for do
-- >
-- > splitSubex "({1}do){|}({2}[a-z]+)" "do you know"
-- >                                = ([("1","do")]," you know")
-- >                                                                   -- no nondeterminism with {|}: 1. match for do
-- >
-- > splitSubex "({1}a+)"   "bcd" = ([], "bcd")                        -- no match
-- > splitSubex "["         "abc" = ([], "abc")                        -- syntax error


splitSubex      :: StringLike s => s -> s -> ([(s, s)], s)
splitSubex :: s -> s -> ([(s, s)], s)
splitSubex s
re s
inp
                = ([(s, s)], s) -> Maybe ([(s, s)], s) -> ([(s, s)], s)
forall a. a -> Maybe a -> a
fromMaybe ([], s
inp) (Maybe ([(s, s)], s) -> ([(s, s)], s))
-> (s -> Maybe ([(s, s)], s)) -> s -> ([(s, s)], s)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GenRegex s -> s -> Maybe ([(s, s)], s)
forall s. StringLike s => GenRegex s -> s -> Maybe ([(s, s)], s)
splitSubexRE (GenRegex s -> s -> Maybe ([(s, s)], s))
-> (s -> GenRegex s) -> s -> s -> Maybe ([(s, s)], s)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt (s -> s -> Maybe ([(s, s)], s)) -> s -> s -> Maybe ([(s, s)], s)
forall a b. (a -> b) -> a -> b
$ s
re) (s -> ([(s, s)], s)) -> s -> ([(s, s)], s)
forall a b. (a -> b) -> a -> b
$ s
inp

-- ------------------------------------------------------------

-- | The function, that does the real work for 'tokenize'

tokenizeRE      :: StringLike s => GenRegex s -> s -> [s]
tokenizeRE :: GenRegex s -> s -> [s]
tokenizeRE GenRegex s
re
    = s -> [s]
token''
    where
    fcs :: CharSet
fcs         = GenRegex s -> CharSet
forall s. StringLike s => GenRegex s -> CharSet
firstChars GenRegex s
re
    re1 :: GenRegex s
re1         = GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkDiff GenRegex s
re GenRegex s
forall s. GenRegex s
mkUnit
    token'' :: s -> [s]
token''     = GenRegex s -> CharSet -> s -> [s]
token' GenRegex s
re  CharSet
fcs
    token1'' :: s -> [s]
token1''    = GenRegex s -> CharSet -> s -> [s]
token' GenRegex s
re1 CharSet
fcs

    -- token'   :: StringLike s => GenRegex s -> CharSet -> s -> [s]
    token' :: GenRegex s -> CharSet -> s -> [s]
token' GenRegex s
re' CharSet
fcs' s
inp
      | s -> Bool
forall a. StringLike a => a -> Bool
nullS s
inp  = []
      | Bool
otherwise  = Maybe ([(Label s, s)], s) -> [s]
forall a. Maybe ([(a, s)], s) -> [s]
evalRes (Maybe ([(Label s, s)], s) -> [s])
-> (s -> Maybe ([(Label s, s)], s)) -> s -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenRegex s -> CharSet -> s -> Maybe ([(Label s, s)], s)
forall s.
StringLike s =>
GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)
splitWithRegexCS GenRegex s
re' CharSet
fcs' (s -> [s]) -> s -> [s]
forall a b. (a -> b) -> a -> b
$ s
inp
      where
        evalRes :: Maybe ([(a, s)], s) -> [s]
evalRes Maybe ([(a, s)], s)
Nothing
          = s -> [s]
token'' (Int -> s -> s
forall a. StringLike a => Int -> a -> a
dropS Int
1 s
inp)         -- re does not match any prefix

        evalRes (Just ([(a, s)]
toks, s
rest))
          | s -> Bool
forall a. StringLike a => a -> Bool
nullS s
tok  = s
tok s -> [s] -> [s]
forall a. a -> [a] -> [a]
: s -> [s]
token'' (Int -> s -> s
forall a. StringLike a => Int -> a -> a
dropS Int
1 s
rest) -- re is nullable and only the empty prefix matches
                                                      -- discard one char and try again
          | Bool
otherwise = s
tok s -> [s] -> [s]
forall a. a -> [a] -> [a]
: s -> [s]
token1'' s
rest           -- real token found, next token must not be empty
          where
            tok :: s
tok = (a, s) -> s
forall a b. (a, b) -> b
snd ((a, s) -> s) -> ([(a, s)] -> (a, s)) -> [(a, s)] -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(a, s)] -> (a, s)
forall a. [a] -> a
head ([(a, s)] -> s) -> [(a, s)] -> s
forall a b. (a -> b) -> a -> b
$ [(a, s)]
toks

-- | split a string into tokens (words) by giving a regular expression
-- which all tokens must match.
--
-- Convenient function for 'tokenizeRE'
--
-- This can be used for simple tokenizers.
-- It is recommended to use regular expressions where the empty word does not match.
-- Else there will appear a lot of probably useless empty tokens in the output.
-- All none matching chars are discarded. If the given regex contains syntax errors,
-- @Nothing@ is returned
--
-- examples:
--
-- > tokenize "a" "aabba"      = ["a","a","a"]
-- > tokenize "a*" "aaaba"     = ["aaa","a"]
-- > tokenize "a*" "bbb"       = ["","",""]
-- > tokenize "a+" "bbb"       = []
-- >
-- > tokenize "a*b" ""         = []
-- > tokenize "a*b" "abc"      = ["ab"]
-- > tokenize "a*b" "abaab ab" = ["ab","aab","ab"]
-- >
-- > tokenize "[a-z]{2,}|[0-9]{2,}|[0-9]+[.][0-9]+" "ab123 456.7abc"
-- >                           = ["ab","123","456.7","abc"]
-- >
-- > tokenize "[a-z]*|[0-9]{2,}|[0-9]+[.][0-9]+" "cab123 456.7abc"
-- >                           = ["cab","123","456.7","abc"]
-- >
-- > tokenize "[^ \t\n\r]*" "abc def\t\n\rxyz"
-- >                           = ["abc","def","xyz"]
-- >
-- > tokenize ".*"   "\nabc\n123\n\nxyz\n"
-- >                           = ["","abc","123","","xyz"]
-- >
-- > tokenize ".*"             = lines
-- >
-- > tokenize "[^ \t\n\r]*"    = words

tokenize        :: StringLike s => s -> s -> [s]
tokenize :: s -> s -> [s]
tokenize        = GenRegex s -> s -> [s]
forall s. StringLike s => GenRegex s -> s -> [s]
tokenizeRE (GenRegex s -> s -> [s]) -> (s -> GenRegex s) -> s -> s -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegex

-- | tokenize with extended syntax

tokenizeExt     :: StringLike s => s -> s -> [s]
tokenizeExt :: s -> s -> [s]
tokenizeExt     = GenRegex s -> s -> [s]
forall s. StringLike s => GenRegex s -> s -> [s]
tokenizeRE (GenRegex s -> s -> [s]) -> (s -> GenRegex s) -> s -> s -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt

-- ------------------------------------------------------------

-- | split a string into tokens and delimierter by giving a regular expression
-- which all tokens must match
--
-- This is a generalisation of the above 'tokenizeRE' functions.
-- The none matching char sequences are marked with @Left@, the matching ones are marked with @Right@
--
-- If the regular expression contains syntax errors @Nothing@ is returned
--
-- The following Law holds:
--
-- > concat . map (either id id) . tokenizeRE' re == id

tokenizeRE'     :: StringLike s => GenRegex s -> s -> [Either s s]
tokenizeRE' :: GenRegex s -> s -> [Either s s]
tokenizeRE' GenRegex s
re s
inp0
    = (s, Int) -> s -> [Either s s]
token'' (s
inp0, Int
0) s
inp0
    where
    fcs :: CharSet
fcs         = GenRegex s -> CharSet
forall s. StringLike s => GenRegex s -> CharSet
firstChars GenRegex s
re
    re1 :: GenRegex s
re1         = GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkDiff GenRegex s
re GenRegex s
forall s. GenRegex s
mkUnit
    token'' :: (s, Int) -> s -> [Either s s]
token''     = GenRegex s -> CharSet -> (s, Int) -> s -> [Either s s]
token' GenRegex s
re  CharSet
fcs
    token1'' :: (s, Int) -> s -> [Either s s]
token1''    = GenRegex s -> CharSet -> (s, Int) -> s -> [Either s s]
token' GenRegex s
re1 CharSet
fcs

    -- token'   :: StringLike s => GenRegex s -> CharSet -> (s, Int) -> s -> [Either s s]
    token' :: GenRegex s -> CharSet -> (s, Int) -> s -> [Either s s]
token' GenRegex s
re' CharSet
fcs' (s
uns, !Int
n) s
inp
      | s -> Bool
forall a. StringLike a => a -> Bool
nullS s
inp     = [Either s s] -> [Either s s]
forall b. [Either s b] -> [Either s b]
addUnmatched []
      | Bool
otherwise     = Maybe ([(Label s, s)], s) -> [Either s s]
forall a. Maybe ([(a, s)], s) -> [Either s s]
evalRes (Maybe ([(Label s, s)], s) -> [Either s s])
-> (s -> Maybe ([(Label s, s)], s)) -> s -> [Either s s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenRegex s -> CharSet -> s -> Maybe ([(Label s, s)], s)
forall s.
StringLike s =>
GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)
splitWithRegexCS GenRegex s
re' CharSet
fcs' (s -> [Either s s]) -> s -> [Either s s]
forall a b. (a -> b) -> a -> b
$ s
inp
      where
        addUnmatched :: [Either s b] -> [Either s b]
addUnmatched
          | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0     = [Either s b] -> [Either s b]
forall a. a -> a
id
          | Bool
otherwise  = ((s -> Either s b
forall a b. a -> Either a b
Left (s -> Either s b) -> s -> Either s b
forall a b. (a -> b) -> a -> b
$ Int -> s -> s
forall a. StringLike a => Int -> a -> a
takeS Int
n s
uns) Either s b -> [Either s b] -> [Either s b]
forall a. a -> [a] -> [a]
:)

        addMatched :: b -> [Either s b] -> [Either s b]
addMatched b
t
          = [Either s b] -> [Either s b]
forall b. [Either s b] -> [Either s b]
addUnmatched ([Either s b] -> [Either s b])
-> ([Either s b] -> [Either s b]) -> [Either s b] -> [Either s b]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((b -> Either s b
forall a b. b -> Either a b
Right b
t) Either s b -> [Either s b] -> [Either s b]
forall a. a -> [a] -> [a]
:)

        evalRes :: Maybe ([(a, s)], s) -> [Either s s]
evalRes Maybe ([(a, s)], s)
Nothing
          = (s, Int) -> s -> [Either s s]
token'' (s
uns, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> s -> s
forall a. StringLike a => Int -> a -> a
dropS Int
1 s
inp)       -- re does not match any prefix

        evalRes (Just ([(a, s)]
toks, s
rest))
            | s -> Bool
forall a. StringLike a => a -> Bool
nullS s
tok = s -> [Either s s] -> [Either s s]
forall b. b -> [Either s b] -> [Either s b]
addMatched s
tok           -- re is nullable and only the empty prefix matches
                          ([Either s s] -> [Either s s]) -> [Either s s] -> [Either s s]
forall a b. (a -> b) -> a -> b
$ (s, Int) -> s -> [Either s s]
token'' (s
rest, Int
1)
                                    (Int -> s -> s
forall a. StringLike a => Int -> a -> a
dropS Int
1 s
rest) -- discard one char and try again

            | Bool
otherwise = s -> [Either s s] -> [Either s s]
forall b. b -> [Either s b] -> [Either s b]
addMatched s
tok
                          ([Either s s] -> [Either s s]) -> [Either s s] -> [Either s s]
forall a b. (a -> b) -> a -> b
$ (s, Int) -> s -> [Either s s]
token1'' (s
rest, Int
0) s
rest -- real token found, next token must not be empty
          where
            tok :: s
tok = (a, s) -> s
forall a b. (a, b) -> b
snd ((a, s) -> s) -> ([(a, s)] -> (a, s)) -> [(a, s)] -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(a, s)] -> (a, s)
forall a. [a] -> a
head ([(a, s)] -> s) -> [(a, s)] -> s
forall a b. (a -> b) -> a -> b
$ [(a, s)]
toks

-- | convenient function for 'tokenizeRE''
--
-- When the regular expression parses as Zero, @[Left input]@ is returned, that means no tokens are found

tokenize'       :: StringLike s => s -> s -> [Either s s]
tokenize' :: s -> s -> [Either s s]
tokenize'       = GenRegex s -> s -> [Either s s]
forall s. StringLike s => GenRegex s -> s -> [Either s s]
tokenizeRE' (GenRegex s -> s -> [Either s s])
-> (s -> GenRegex s) -> s -> s -> [Either s s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegex

tokenizeExt'    :: StringLike s => s -> s -> [Either s s]
tokenizeExt' :: s -> s -> [Either s s]
tokenizeExt'    = GenRegex s -> s -> [Either s s]
forall s. StringLike s => GenRegex s -> s -> [Either s s]
tokenizeRE' (GenRegex s -> s -> [Either s s])
-> (s -> GenRegex s) -> s -> s -> [Either s s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt

-- ------------------------------------------------------------

-- | split a string into tokens (pair of labels and words) by giving a regular expression
-- containing labeled subexpressions.
--
-- This function should not be called with regular expressions
-- without any labeled subexpressions. This does not make sense, because the result list
-- will always be empty.
--
-- Result is the list of matching subexpressions
-- This can be used for simple tokenizers.
-- At least one char is consumed by parsing a token.
-- The pairs in the result list contain the matching substrings.
-- All none matching chars are discarded. If the given regex contains syntax errors,
-- @Nothing@ is returned

tokenizeSubexRE :: StringLike s => GenRegex s -> s -> [(s, s)]
tokenizeSubexRE :: GenRegex s -> s -> [(s, s)]
tokenizeSubexRE GenRegex s
re
    = s -> [(s, s)]
token''
    where
    fcs :: CharSet
fcs         = GenRegex s -> CharSet
forall s. StringLike s => GenRegex s -> CharSet
firstChars GenRegex s
re
    re1 :: GenRegex s
re1         = GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkDiff GenRegex s
re GenRegex s
forall s. GenRegex s
mkUnit
    token'' :: s -> [(s, s)]
token''     = GenRegex s -> CharSet -> s -> [(s, s)]
token' GenRegex s
re  CharSet
fcs
    token1'' :: s -> [(s, s)]
token1''    = GenRegex s -> CharSet -> s -> [(s, s)]
token' GenRegex s
re1 CharSet
fcs

    -- token'   :: StringLike s => GenRegex s -> CharSet -> s -> [(s, s)]
    token' :: GenRegex s -> CharSet -> s -> [(s, s)]
token' GenRegex s
re' CharSet
fcs' s
inp
      | s -> Bool
forall a. StringLike a => a -> Bool
nullS s
inp      = []
      | Bool
otherwise     = Maybe ([(Maybe s, s)], s) -> [(s, s)]
evalRes (Maybe ([(Maybe s, s)], s) -> [(s, s)])
-> (s -> Maybe ([(Maybe s, s)], s)) -> s -> [(s, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenRegex s -> CharSet -> s -> Maybe ([(Maybe s, s)], s)
forall s.
StringLike s =>
GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)
splitWithRegexCS GenRegex s
re' CharSet
fcs' (s -> [(s, s)]) -> s -> [(s, s)]
forall a b. (a -> b) -> a -> b
$ s
inp
      where
        evalRes :: Maybe ([(Maybe s, s)], s) -> [(s, s)]
evalRes Maybe ([(Maybe s, s)], s)
Nothing
          = s -> [(s, s)]
token'' (Int -> s -> s
forall a. StringLike a => Int -> a -> a
dropS Int
1 s
inp)            -- re does not match any prefix

        evalRes (Just ([(Maybe s, s)]
toks, s
rest))
          | s -> Bool
forall a. StringLike a => a -> Bool
nullS s
tok = [(s, s)]
res [(s, s)] -> [(s, s)] -> [(s, s)]
forall a. [a] -> [a] -> [a]
++ s -> [(s, s)]
token'' (Int -> s -> s
forall a. StringLike a => Int -> a -> a
dropS Int
1 s
rest) -- re is nullable and only the empty prefix matches
          | Bool
otherwise = [(s, s)]
res [(s, s)] -> [(s, s)] -> [(s, s)]
forall a. [a] -> [a] -> [a]
++ s -> [(s, s)]
token1'' s
rest         -- token found, tokenize the rest
          where
            res :: [(s, s)]
res = ((Maybe s, s) -> (s, s)) -> [(Maybe s, s)] -> [(s, s)]
forall a b. (a -> b) -> [a] -> [b]
map ((Maybe s -> s) -> (Maybe s, s) -> (s, s)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first Maybe s -> s
forall a. HasCallStack => Maybe a -> a
fromJust) ([(Maybe s, s)] -> [(s, s)])
-> ([(Maybe s, s)] -> [(Maybe s, s)]) -> [(Maybe s, s)] -> [(s, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Maybe s, s)] -> [(Maybe s, s)]
forall a. [a] -> [a]
tail ([(Maybe s, s)] -> [(s, s)]) -> [(Maybe s, s)] -> [(s, s)]
forall a b. (a -> b) -> a -> b
$ [(Maybe s, s)]
toks
            tok :: s
tok = (Maybe s, s) -> s
forall a b. (a, b) -> b
snd ((Maybe s, s) -> s)
-> ([(Maybe s, s)] -> (Maybe s, s)) -> [(Maybe s, s)] -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Maybe s, s)] -> (Maybe s, s)
forall a. [a] -> a
head ([(Maybe s, s)] -> s) -> [(Maybe s, s)] -> s
forall a b. (a -> b) -> a -> b
$ [(Maybe s, s)]
toks

-- | convenient function for 'tokenizeSubexRE' a string
--
-- examples:
--
-- > tokenizeSubex "({name}[a-z]+)|({num}[0-9]{2,})|({real}[0-9]+[.][0-9]+)"
-- >                 "cab123 456.7abc"
-- >                                  = [("name","cab")
-- >                                    ,("num","123")
-- >                                    ,("real","456.7")
-- >                                    ,("name","abc")]
-- >
-- > tokenizeSubex "({real}({n}[0-9]+)([.]({f}[0-9]+))?)"
-- >                 "12.34"          = [("real","12.34")
-- >                                    ,("n","12")
-- >                                    ,("f","34")]
-- >
-- > tokenizeSubex "({real}({n}[0-9]+)([.]({f}[0-9]+))?)"
-- >                  "12 34"         = [("real","12"),("n","12")
-- >                                    ,("real","34"),("n","34")]
-- >
-- > tokenizeSubex "({real}({n}[0-9]+)(([.]({f}[0-9]+))|({f})))"
-- >                  "12 34.56"      = [("real","12"),("n","12"),("f","")
-- >                                    ,("real","34.56"),("n","34"),("f","56")]

tokenizeSubex   :: StringLike s => s -> s -> [(s, s)]
tokenizeSubex :: s -> s -> [(s, s)]
tokenizeSubex   = GenRegex s -> s -> [(s, s)]
forall s. StringLike s => GenRegex s -> s -> [(s, s)]
tokenizeSubexRE (GenRegex s -> s -> [(s, s)])
-> (s -> GenRegex s) -> s -> s -> [(s, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt

-- ------------------------------------------------------------

-- | sed like editing function
--
-- All matching tokens are edited by the 1. argument, the editing function,
-- all other chars remain as they are

sedRE           :: StringLike s => (s -> s) ->  GenRegex s -> s -> s
sedRE :: (s -> s) -> GenRegex s -> s -> s
sedRE s -> s
edit GenRegex s
re   = [s] -> s
forall a. StringLike a => [a] -> a
concatS ([s] -> s) -> (s -> [s]) -> s -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either s s -> s) -> [Either s s] -> [s]
forall a b. (a -> b) -> [a] -> [b]
map ((s -> s) -> (s -> s) -> Either s s -> s
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either s -> s
forall a. a -> a
id s -> s
edit) ([Either s s] -> [s]) -> (s -> [Either s s]) -> s -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenRegex s -> s -> [Either s s]
forall s. StringLike s => GenRegex s -> s -> [Either s s]
tokenizeRE' GenRegex s
re

-- | convenient function for 'sedRE'
--
-- examples:
--
-- > sed (const "b") "a" "xaxax"       = "xbxbx"
-- > sed (\ x -> x ++ x) "a" "xax"     = "xaax"
-- > sed undefined       "[" "xxx"     = "xxx"

sed             :: StringLike s => (s -> s) -> s -> s -> s
sed :: (s -> s) -> s -> s -> s
sed s -> s
edit        = (s -> s) -> GenRegex s -> s -> s
forall s. StringLike s => (s -> s) -> GenRegex s -> s -> s
sedRE s -> s
edit (GenRegex s -> s -> s) -> (s -> GenRegex s) -> s -> s -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegex

sedExt          :: StringLike s => (s -> s) -> s -> s -> s
sedExt :: (s -> s) -> s -> s -> s
sedExt s -> s
edit     = (s -> s) -> GenRegex s -> s -> s
forall s. StringLike s => (s -> s) -> GenRegex s -> s -> s
sedRE s -> s
edit (GenRegex s -> s -> s) -> (s -> GenRegex s) -> s -> s -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt

-- ------------------------------------------------------------

-- | match a string with a regular expression

matchRE         :: StringLike s => GenRegex s -> s -> Bool
matchRE :: GenRegex s -> s -> Bool
matchRE         = GenRegex s -> s -> Bool
forall s. StringLike s => GenRegex s -> s -> Bool
matchWithRegex

-- | convenient function for 'matchRE'
--
-- Examples:
--
-- > match "x*" "xxx" = True
-- > match "x" "xxx"  = False
-- > match "[" "xxx"  = False

match           :: StringLike s => s -> s -> Bool
match :: s -> s -> Bool
match           = GenRegex s -> s -> Bool
forall s. StringLike s => GenRegex s -> s -> Bool
matchWithRegex (GenRegex s -> s -> Bool) -> (s -> GenRegex s) -> s -> s -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegex

-- | match with extended regular expressions

matchExt        :: StringLike s => s -> s -> Bool
matchExt :: s -> s -> Bool
matchExt        = GenRegex s -> s -> Bool
forall s. StringLike s => GenRegex s -> s -> Bool
matchWithRegex (GenRegex s -> s -> Bool) -> (s -> GenRegex s) -> s -> s -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt

-- ------------------------------------------------------------

-- | match a string with a regular expression
-- and extract subexpression matches

matchSubexRE            :: StringLike s => GenRegex s -> s -> [(s, s)]
matchSubexRE :: GenRegex s -> s -> [(s, s)]
matchSubexRE GenRegex s
re         = ((Label s, s) -> (s, s)) -> [(Label s, s)] -> [(s, s)]
forall a b. (a -> b) -> [a] -> [b]
map ((Label s -> s) -> (Label s, s) -> (s, s)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first Label s -> s
forall a. HasCallStack => Maybe a -> a
fromJust) ([(Label s, s)] -> [(s, s)])
-> (s -> [(Label s, s)]) -> s -> [(s, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Label s, s)] -> Maybe [(Label s, s)] -> [(Label s, s)]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [(Label s, s)] -> [(Label s, s)])
-> (s -> Maybe [(Label s, s)]) -> s -> [(Label s, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenRegex s -> s -> Maybe [(Label s, s)]
forall s. StringLike s => GenRegex s -> s -> Maybe (SubexResults s)
matchWithRegex' GenRegex s
re

-- | convenient function for 'matchRE'
--
-- Examples:
--
-- > matchSubex "({1}x*)"                 "xxx"      = [("1","xxx")]
-- > matchSubex "({1}x*)"                 "y"        = []
-- > matchSubex "({w}[0-9]+)x({h}[0-9]+)" "800x600"  = [("w","800"),("h","600")]
-- > matchSubex "[" "xxx"                            = []

matchSubex              :: StringLike s => s -> s -> [(s, s)]
matchSubex :: s -> s -> [(s, s)]
matchSubex              = GenRegex s -> s -> [(s, s)]
forall s. StringLike s => GenRegex s -> s -> [(s, s)]
matchSubexRE (GenRegex s -> s -> [(s, s)])
-> (s -> GenRegex s) -> s -> s -> [(s, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> GenRegex s
forall s. StringLike s => s -> GenRegex s
parseRegexExt

-- ------------------------------------------------------------

-- | grep like filter for lists of strings
--
-- The regular expression may be prefixed with the usual context spec \"^\" for start of string,
-- and "\\<" for start of word.
-- and suffixed with \"$\" for end of text and "\\>" end of word.
-- Word chars are defined by the multi char escape sequence "\\w"
--
-- Examples
--
-- > grep "a"    ["_a_", "_a", "a_", "a", "_"]      => ["_a_", "_a", "a_", "a"]
-- > grep "^a"   ["_a_", "_a", "a_", "a", "_"]      => ["a_", "a"]
-- > grep "a$"   ["_a_", "_a", "a_", "a", "_"]      => ["_a", "a"]
-- > grep "^a$"  ["_a_", "_a", "a_", "a", "_"]      => ["a"]
-- > grep "\\<a" ["x a b", " ax ", " xa ", "xab"]   => ["x a b", " ax "]
-- > grep "a\\>" ["x a b", " ax ", " xa ", "xab"]   => ["x a b", " xa "]

grep                    :: StringLike s => s -> [s] -> [s]
grep :: s -> [s] -> [s]
grep                    = (String -> GenRegex s) -> s -> [s] -> [s]
forall s. StringLike s => (String -> GenRegex s) -> s -> [s] -> [s]
grep' String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegex'

-- | grep with extended regular expressions

grepExt                 :: StringLike s => s -> [s] -> [s]
grepExt :: s -> [s] -> [s]
grepExt                 = (String -> GenRegex s) -> s -> [s] -> [s]
forall s. StringLike s => (String -> GenRegex s) -> s -> [s] -> [s]
grep' String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegexExt'

grep'                   :: StringLike s => (String -> GenRegex s) -> s -> [s] -> [s]
grep' :: (String -> GenRegex s) -> s -> [s] -> [s]
grep' String -> GenRegex s
parseRe           = GenRegex s -> [s] -> [s]
forall s. StringLike s => GenRegex s -> [s] -> [s]
grepRE (GenRegex s -> [s] -> [s]) -> (s -> GenRegex s) -> s -> [s] -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> GenRegex s) -> s -> GenRegex s
forall s. StringLike s => (String -> GenRegex s) -> s -> GenRegex s
parseContextRegex String -> GenRegex s
parseRe

-- | grep with already prepared Regex (ususally with 'parseContextRegex')

grepRE                  :: StringLike s => GenRegex s-> [s] -> [s]
grepRE :: GenRegex s -> [s] -> [s]
grepRE GenRegex s
re               = (s -> Bool) -> [s] -> [s]
forall a. (a -> Bool) -> [a] -> [a]
filter (GenRegex s -> s -> Bool
forall s. StringLike s => GenRegex s -> s -> Bool
matchRE GenRegex s
re)

-- | grep with Regex and line numbers

grepREwithLineNum       :: StringLike s => GenRegex s -> [s] -> [(Int, s)]
grepREwithLineNum :: GenRegex s -> [s] -> [(Int, s)]
grepREwithLineNum GenRegex s
re     = ((Int, s) -> Bool) -> [(Int, s)] -> [(Int, s)]
forall a. (a -> Bool) -> [a] -> [a]
filter (GenRegex s -> s -> Bool
forall s. StringLike s => GenRegex s -> s -> Bool
matchRE GenRegex s
re (s -> Bool) -> ((Int, s) -> s) -> (Int, s) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, s) -> s
forall a b. (a, b) -> b
snd) ([(Int, s)] -> [(Int, s)])
-> ([s] -> [(Int, s)]) -> [s] -> [(Int, s)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [s] -> [(Int, s)]
forall a b. [a] -> [b] -> [(a, b)]
zip [(Int
1::Int)..]

-- ------------------------------------------------------------