{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Hledger.Utils.Regex (
Regexp(reString)
,toRegex
,toRegexCI
,toRegex'
,toRegexCI'
,Replacement
,RegexError
,regexMatch
,regexMatchText
,regexReplace
,regexReplaceUnmemo
,regexReplaceAllBy
)
where
import Control.Monad (foldM)
import Data.Aeson (ToJSON(..), Value(String))
import Data.Array ((!), elems, indices)
import Data.Char (isDigit)
import Data.List (foldl')
import Data.MemoUgly (memo)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Regex.TDFA (
Regex, CompOption(..), defaultCompOpt, defaultExecOpt,
makeRegexOptsM, AllMatches(getAllMatches), match, MatchText,
RegexLike(..), RegexMaker(..), RegexOptions(..), RegexContext(..)
)
data Regexp
= Regexp { Regexp -> Text
reString :: Text, Regexp -> Regex
reCompiled :: Regex }
| RegexpCI { reString :: Text, reCompiled :: Regex }
instance Eq Regexp where
Regexp Text
s1 Regex
_ == :: Regexp -> Regexp -> Bool
== Regexp Text
s2 Regex
_ = Text
s1 forall a. Eq a => a -> a -> Bool
== Text
s2
RegexpCI Text
s1 Regex
_ == RegexpCI Text
s2 Regex
_ = Text
s1 forall a. Eq a => a -> a -> Bool
== Text
s2
Regexp
_ == Regexp
_ = Bool
False
instance Ord Regexp where
Regexp Text
s1 Regex
_ compare :: Regexp -> Regexp -> Ordering
`compare` Regexp Text
s2 Regex
_ = Text
s1 forall a. Ord a => a -> a -> Ordering
`compare` Text
s2
RegexpCI Text
s1 Regex
_ `compare` RegexpCI Text
s2 Regex
_ = Text
s1 forall a. Ord a => a -> a -> Ordering
`compare` Text
s2
Regexp Text
_ Regex
_ `compare` RegexpCI Text
_ Regex
_ = Ordering
LT
RegexpCI Text
_ Regex
_ `compare` Regexp Text
_ Regex
_ = Ordering
GT
instance Show Regexp where
showsPrec :: Int -> Regexp -> ShowS
showsPrec Int
d Regexp
r = Bool -> ShowS -> ShowS
showParen (Int
d forall a. Ord a => a -> a -> Bool
> Int
app_prec) forall a b. (a -> b) -> a -> b
$ ShowS
reCons forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => Int -> a -> ShowS
showsPrec (Int
app_precforall a. Num a => a -> a -> a
+Int
1) (Text -> String
T.unpack forall a b. (a -> b) -> a -> b
$ Regexp -> Text
reString Regexp
r)
where app_prec :: Int
app_prec = Int
10
reCons :: ShowS
reCons = case Regexp
r of Regexp Text
_ Regex
_ -> String -> ShowS
showString String
"Regexp "
RegexpCI Text
_ Regex
_ -> String -> ShowS
showString String
"RegexpCI "
instance Read Regexp where
readsPrec :: Int -> ReadS Regexp
readsPrec Int
d String
r = forall a. Bool -> ReadS a -> ReadS a
readParen (Int
d forall a. Ord a => a -> a -> Bool
> Int
app_prec) (\String
r' -> [(Text -> Regexp
toRegexCI' Text
m,String
t) |
(String
"RegexCI",String
s) <- ReadS String
lex String
r',
(Text
m,String
t) <- forall a. Read a => Int -> ReadS a
readsPrec (Int
app_precforall a. Num a => a -> a -> a
+Int
1) String
s]) String
r
forall a. [a] -> [a] -> [a]
++ forall a. Bool -> ReadS a -> ReadS a
readParen (Int
d forall a. Ord a => a -> a -> Bool
> Int
app_prec) (\String
r' -> [(Text -> Regexp
toRegex' Text
m, String
t) |
(String
"Regex",String
s) <- ReadS String
lex String
r',
(Text
m,String
t) <- forall a. Read a => Int -> ReadS a
readsPrec (Int
app_precforall a. Num a => a -> a -> a
+Int
1) String
s]) String
r
where app_prec :: Int
app_prec = Int
10
instance ToJSON Regexp where
toJSON :: Regexp -> Value
toJSON (Regexp Text
s Regex
_) = Text -> Value
String forall a b. (a -> b) -> a -> b
$ Text
"Regexp " forall a. Semigroup a => a -> a -> a
<> Text
s
toJSON (RegexpCI Text
s Regex
_) = Text -> Value
String forall a b. (a -> b) -> a -> b
$ Text
"RegexpCI " forall a. Semigroup a => a -> a -> a
<> Text
s
instance RegexLike Regexp String where
matchOnce :: Regexp -> String -> Maybe MatchArray
matchOnce = forall regex source.
RegexLike regex source =>
regex -> source -> Maybe MatchArray
matchOnce forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
matchAll :: Regexp -> String -> [MatchArray]
matchAll = forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
matchAll forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
matchCount :: Regexp -> String -> Int
matchCount = forall regex source.
RegexLike regex source =>
regex -> source -> Int
matchCount forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
matchTest :: Regexp -> String -> Bool
matchTest = forall regex source.
RegexLike regex source =>
regex -> source -> Bool
matchTest forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
matchAllText :: Regexp -> String -> [MatchText String]
matchAllText = forall regex source.
RegexLike regex source =>
regex -> source -> [MatchText source]
matchAllText forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
matchOnceText :: Regexp -> String -> Maybe (String, MatchText String, String)
matchOnceText = forall regex source.
RegexLike regex source =>
regex -> source -> Maybe (source, MatchText source, source)
matchOnceText forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
instance RegexContext Regexp String String where
match :: Regexp -> ShowS
match = forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
matchM :: forall (m :: * -> *). MonadFail m => Regexp -> String -> m String
matchM = forall regex source target (m :: * -> *).
(RegexContext regex source target, MonadFail m) =>
regex -> source -> m target
matchM forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regexp -> Regex
reCompiled
toRegex :: Text -> Either RegexError Regexp
toRegex :: Text -> Either String Regexp
toRegex = forall a b. Ord a => (a -> b) -> a -> b
memo forall a b. (a -> b) -> a -> b
$ \Text
s -> forall a. Text -> Maybe a -> Either String a
mkRegexErr Text
s (Text -> Regex -> Regexp
Regexp Text
s forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall regex compOpt execOpt source (m :: * -> *).
(RegexMaker regex compOpt execOpt source, MonadFail m) =>
source -> m regex
makeRegexM (Text -> String
T.unpack Text
s))
toRegexCI :: Text -> Either RegexError Regexp
toRegexCI :: Text -> Either String Regexp
toRegexCI = forall a b. Ord a => (a -> b) -> a -> b
memo forall a b. (a -> b) -> a -> b
$ \Text
s -> forall a. Text -> Maybe a -> Either String a
mkRegexErr Text
s (Text -> Regex -> Regexp
RegexpCI Text
s forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall regex compOpt execOpt source (m :: * -> *).
(RegexMaker regex compOpt execOpt source, MonadFail m) =>
compOpt -> execOpt -> source -> m regex
makeRegexOptsM forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
defaultCompOpt{caseSensitive :: Bool
caseSensitive=Bool
False} forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
defaultExecOpt (Text -> String
T.unpack Text
s))
mkRegexErr :: Text -> Maybe a -> Either RegexError a
mkRegexErr :: forall a. Text -> Maybe a -> Either String a
mkRegexErr Text
s = forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left String
errmsg) forall a b. b -> Either a b
Right
where errmsg :: String
errmsg = Text -> String
T.unpack forall a b. (a -> b) -> a -> b
$ Text
"This regular expression is malformed, please correct it:\n" forall a. Semigroup a => a -> a -> a
<> Text
s
toRegex' :: Text -> Regexp
toRegex' :: Text -> Regexp
toRegex' = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. String -> a
errorWithoutStackTrace forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Either String Regexp
toRegex
toRegexCI' :: Text -> Regexp
toRegexCI' :: Text -> Regexp
toRegexCI' = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. String -> a
errorWithoutStackTrace forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Either String Regexp
toRegexCI
type Replacement = String
type RegexError = String
regexMatch :: Regexp -> String -> Bool
regexMatch :: Regexp -> String -> Bool
regexMatch = forall regex source.
RegexLike regex source =>
regex -> source -> Bool
matchTest
regexMatchText :: Regexp -> Text -> Bool
regexMatchText :: Regexp -> Text -> Bool
regexMatchText Regexp
r = forall regex source.
RegexLike regex source =>
regex -> source -> Bool
matchTest Regexp
r forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack
regexReplace :: Regexp -> Replacement -> String -> Either RegexError String
regexReplace :: Regexp -> String -> String -> Either String String
regexReplace Regexp
re String
repl = forall a b. Ord a => (a -> b) -> a -> b
memo forall a b. (a -> b) -> a -> b
$ Regexp -> String -> String -> Either String String
regexReplaceUnmemo Regexp
re String
repl
regexReplaceUnmemo :: Regexp -> Replacement -> String -> Either RegexError String
regexReplaceUnmemo :: Regexp -> String -> String -> Either String String
regexReplaceUnmemo Regexp
re String
repl String
str = forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM (String -> String -> MatchText String -> Either String String
replaceMatch String
repl) String
str (forall a. [a] -> [a]
reverse forall a b. (a -> b) -> a -> b
$ forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match (Regexp -> Regex
reCompiled Regexp
re) String
str :: [MatchText String])
where
replaceMatch :: Replacement -> String -> MatchText String -> Either RegexError String
replaceMatch :: String -> String -> MatchText String -> Either String String
replaceMatch String
replpat String
s MatchText String
matchgroups =
case forall i e. Array i e -> [e]
elems MatchText String
matchgroups of
[] -> forall a b. b -> Either a b
Right String
s
((String
_,(Int
off,Int
len)):[(String, (Int, Int))]
_) ->
Either String String
erpl forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
rpl -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ String
pre forall a. [a] -> [a] -> [a]
++ String
rpl forall a. [a] -> [a] -> [a]
++ String
post
where
(String
pre, String
post') = forall a. Int -> [a] -> ([a], [a])
splitAt Int
off String
s
post :: String
post = forall a. Int -> [a] -> [a]
drop Int
len String
post'
erpl :: Either String String
erpl = forall (m :: * -> *).
Monad m =>
Regexp -> (String -> m String) -> String -> m String
regexReplaceAllByM Regexp
backrefRegex (MatchText String -> String -> Either String String
lookupMatchGroup MatchText String
matchgroups) String
replpat
where
lookupMatchGroup :: MatchText String -> String -> Either RegexError String
lookupMatchGroup :: MatchText String -> String -> Either String String
lookupMatchGroup MatchText String
grps (Char
'\\':s2 :: String
s2@(Char
_:String
_)) | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
s2 =
case forall a. Read a => String -> a
read String
s2 of Int
n | Int
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` forall i e. Ix i => Array i e -> [i]
indices MatchText String
grps -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> a
fst (MatchText String
grps forall i e. Ix i => Array i e -> i -> e
! Int
n)
Int
_ -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"no match group exists for backreference \"\\"forall a. [a] -> [a] -> [a]
++String
sforall a. [a] -> [a] -> [a]
++String
"\""
lookupMatchGroup MatchText String
_ String
s2 = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"lookupMatchGroup called on non-numeric-backreference \""forall a. [a] -> [a] -> [a]
++String
s2forall a. [a] -> [a] -> [a]
++String
"\", shouldn't happen"
backrefRegex :: Regexp
backrefRegex = Text -> Regexp
toRegex' Text
"\\\\[0-9]+"
regexReplaceAllBy :: Regexp -> (String -> String) -> String -> String
regexReplaceAllBy :: Regexp -> ShowS -> ShowS
regexReplaceAllBy Regexp
re ShowS
transform String
s = ShowS
prependdone String
rest
where
(Int
_, String
rest, ShowS
prependdone) = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Int, String, ShowS) -> (Int, Int) -> (Int, String, ShowS)
go (Int
0, String
s, forall a. a -> a
id) [(Int, Int)]
matches
where
matches :: [(Int, Int)]
matches = forall (f :: * -> *) b. AllMatches f b -> f b
getAllMatches forall a b. (a -> b) -> a -> b
$ forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match (Regexp -> Regex
reCompiled Regexp
re) String
s :: [(Int, Int)]
go :: (Int,String,String->String) -> (Int,Int) -> (Int,String,String->String)
go :: (Int, String, ShowS) -> (Int, Int) -> (Int, String, ShowS)
go (Int
pos,String
todo,ShowS
prepend) (Int
off,Int
len) =
let (String
prematch, String
matchandrest) = forall a. Int -> [a] -> ([a], [a])
splitAt (Int
off forall a. Num a => a -> a -> a
- Int
pos) String
todo
(String
matched, String
rest2) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
len String
matchandrest
in (Int
off forall a. Num a => a -> a -> a
+ Int
len, String
rest2, ShowS
prepend forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
prematchforall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ShowS
transform String
matched forall a. [a] -> [a] -> [a]
++))
regexReplaceAllByM :: forall m. Monad m => Regexp -> (String -> m String) -> String -> m String
regexReplaceAllByM :: forall (m :: * -> *).
Monad m =>
Regexp -> (String -> m String) -> String -> m String
regexReplaceAllByM Regexp
re String -> m String
transform String
s =
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM (Int, String, ShowS) -> (Int, Int) -> m (Int, String, ShowS)
go (Int
0, String
s, forall a. a -> a
id) [(Int, Int)]
matches forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(Int
_, String
rest, ShowS
prependdone) -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ ShowS
prependdone String
rest
where
matches :: [(Int, Int)]
matches = forall (f :: * -> *) b. AllMatches f b -> f b
getAllMatches forall a b. (a -> b) -> a -> b
$ forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match (Regexp -> Regex
reCompiled Regexp
re) String
s :: [(Int, Int)]
go :: (Int,String,String->String) -> (Int,Int) -> m (Int,String,String->String)
go :: (Int, String, ShowS) -> (Int, Int) -> m (Int, String, ShowS)
go (Int
pos,String
todo,ShowS
prepend) (Int
off,Int
len) =
let (String
prematch, String
matchandrest) = forall a. Int -> [a] -> ([a], [a])
splitAt (Int
off forall a. Num a => a -> a -> a
- Int
pos) String
todo
(String
matched, String
rest) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
len String
matchandrest
in String -> m String
transform String
matched forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
matched' -> forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
off forall a. Num a => a -> a -> a
+ Int
len, String
rest, ShowS
prepend forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
prematchforall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
matched' forall a. [a] -> [a] -> [a]
++))