{-# LANGUAGE RecordWildCards #-}

module Text.Regex.Anagram.Parse
  ( AnaPattern(..)
  , parseAnaPattern
  ) where

import           Data.CaseInsensitive (FoldCase(..))
import qualified Data.IntSet as S
import qualified Data.Set as Set
import           Data.String (IsString(..))
import qualified Text.Regex.TDFA.Pattern as R
import qualified Text.Regex.TDFA.ReadRegex as R

import Text.Regex.Anagram.Types
import Text.Regex.Anagram.Util

-- |A parsed intermediate representation of regular expression pattern to match anagrams.
-- This is exposed mainly to make case-insensitive matches more efficient, so that 'foldCase' can be performed on the 'AnaPattern' to avoid unnecessary compilation.
-- Represented as an (expanded) list of alternative 'PatChars'.
newtype AnaPattern = AnaPattern [PatChars]
  deriving (Int -> AnaPattern -> ShowS
[AnaPattern] -> ShowS
AnaPattern -> String
(Int -> AnaPattern -> ShowS)
-> (AnaPattern -> String)
-> ([AnaPattern] -> ShowS)
-> Show AnaPattern
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AnaPattern] -> ShowS
$cshowList :: [AnaPattern] -> ShowS
show :: AnaPattern -> String
$cshow :: AnaPattern -> String
showsPrec :: Int -> AnaPattern -> ShowS
$cshowsPrec :: Int -> AnaPattern -> ShowS
Show)

chrSet :: Set.Set Char -> ChrSet
chrSet :: Set Char -> ChrSet
chrSet = [Int] -> ChrSet
S.fromAscList ([Int] -> ChrSet) -> (Set Char -> [Int]) -> Set Char -> ChrSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Int
forall a. Enum a => a -> Int
fromEnum (String -> [Int]) -> (Set Char -> String) -> Set Char -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set Char -> String
forall a. Set a -> [a]
Set.toAscList

charPat :: PatChar -> PatChars
charPat :: PatChar -> PatChars
charPat PatChar
p = PatChars
forall a. Monoid a => a
mempty{ patReqs :: [PatChar]
patReqs = [PatChar
p] }

makeChar :: R.Pattern -> Maybe PatChar
makeChar :: Pattern -> Maybe PatChar
makeChar R.PDot{} = PatChar -> Maybe PatChar
forall (m :: * -> *) a. Monad m => a -> m a
return (PatChar -> Maybe PatChar) -> PatChar -> Maybe PatChar
forall a b. (a -> b) -> a -> b
$ ChrSet -> PatChar
PatNot ChrSet
S.empty
makeChar R.PChar{ getPatternChar :: Pattern -> Char
R.getPatternChar = Char
c } = PatChar -> Maybe PatChar
forall (m :: * -> *) a. Monad m => a -> m a
return (PatChar -> Maybe PatChar) -> PatChar -> Maybe PatChar
forall a b. (a -> b) -> a -> b
$ Int -> PatChar
PatChr (Int -> PatChar) -> Int -> PatChar
forall a b. (a -> b) -> a -> b
$ Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c
makeChar R.PEscape{ getPatternChar :: Pattern -> Char
R.getPatternChar = Char
c }
  | Char
c Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
"`'<>bB" = PatChar -> Maybe PatChar
forall (m :: * -> *) a. Monad m => a -> m a
return (PatChar -> Maybe PatChar) -> PatChar -> Maybe PatChar
forall a b. (a -> b) -> a -> b
$ Int -> PatChar
PatChr (Int -> PatChar) -> Int -> PatChar
forall a b. (a -> b) -> a -> b
$ Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c
-- TODO: use R.decodePatternSet
makeChar R.PAny{ getPatternSet :: Pattern -> PatternSet
R.getPatternSet = R.PatternSet (Just Set Char
s) Maybe (Set PatternSetCharacterClass)
Nothing Maybe (Set PatternSetCollatingElement)
Nothing Maybe (Set PatternSetEquivalenceClass)
Nothing } =
  PatChar -> Maybe PatChar
forall (m :: * -> *) a. Monad m => a -> m a
return (PatChar -> Maybe PatChar) -> PatChar -> Maybe PatChar
forall a b. (a -> b) -> a -> b
$ ChrSet -> PatChar
PatSet (ChrSet -> PatChar) -> ChrSet -> PatChar
forall a b. (a -> b) -> a -> b
$ Set Char -> ChrSet
chrSet Set Char
s
makeChar R.PAnyNot{ getPatternSet :: Pattern -> PatternSet
R.getPatternSet = R.PatternSet (Just Set Char
s) Maybe (Set PatternSetCharacterClass)
Nothing Maybe (Set PatternSetCollatingElement)
Nothing Maybe (Set PatternSetEquivalenceClass)
Nothing } =
  PatChar -> Maybe PatChar
forall (m :: * -> *) a. Monad m => a -> m a
return (PatChar -> Maybe PatChar) -> PatChar -> Maybe PatChar
forall a b. (a -> b) -> a -> b
$ ChrSet -> PatChar
PatNot (ChrSet -> PatChar) -> ChrSet -> PatChar
forall a b. (a -> b) -> a -> b
$ Set Char -> ChrSet
chrSet Set Char
s
makeChar Pattern
_ = Maybe PatChar
forall a. Maybe a
Nothing

makePattern :: R.Pattern -> Maybe PatChars
makePattern :: Pattern -> Maybe PatChars
makePattern (R.PGroup Maybe Int
_ Pattern
r) = Pattern -> Maybe PatChars
makePattern Pattern
r
makePattern (R.PNonCapture Pattern
r) = Pattern -> Maybe PatChars
makePattern Pattern
r
makePattern (R.POr [Pattern
r]) = Pattern -> Maybe PatChars
makePattern Pattern
r
makePattern (R.PConcat [Pattern]
l) = (Pattern -> Maybe PatChars) -> [Pattern] -> Maybe PatChars
forall (m :: * -> *) b a.
(Monad m, Monoid b) =>
(a -> m b) -> [a] -> m b
foldMapM Pattern -> Maybe PatChars
makePattern [Pattern]
l
makePattern (R.PQuest Pattern
r) = do
  PatChar
p <- Pattern -> Maybe PatChar
makeChar Pattern
r
  PatChars -> Maybe PatChars
forall (m :: * -> *) a. Monad m => a -> m a
return PatChars
forall a. Monoid a => a
mempty{ patOpts :: [PatChar]
patOpts = [PatChar
p] }
makePattern (R.PPlus Pattern
r) = do
  PatChar
p <- Pattern -> Maybe PatChar
makeChar Pattern
r
  PatChars -> Maybe PatChars
forall (m :: * -> *) a. Monad m => a -> m a
return PatChars
forall a. Monoid a => a
mempty{ patReqs :: [PatChar]
patReqs = [PatChar
p], patStar :: PatChar
patStar = PatChar
p }
makePattern (R.PStar Bool
_ Pattern
r) = do
  PatChar
p <- Pattern -> Maybe PatChar
makeChar Pattern
r
  PatChars -> Maybe PatChars
forall (m :: * -> *) a. Monad m => a -> m a
return PatChars
forall a. Monoid a => a
mempty{ patStar :: PatChar
patStar = PatChar
p }
makePattern (R.PBound Int
i Maybe Int
j' Pattern
r) = do
  PatChar
p <- Pattern -> Maybe PatChar
makeChar Pattern
r
  let ip :: PatChars
ip = PatChars
forall a. Monoid a => a
mempty{ patReqs :: [PatChar]
patReqs = Int -> PatChar -> [PatChar]
forall a. Int -> a -> [a]
replicate Int
i PatChar
p }
  PatChars -> Maybe PatChars
forall (m :: * -> *) a. Monad m => a -> m a
return (PatChars -> Maybe PatChars) -> PatChars -> Maybe PatChars
forall a b. (a -> b) -> a -> b
$ PatChars -> (Int -> PatChars) -> Maybe Int -> PatChars
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
    PatChars
ip{ patStar :: PatChar
patStar = PatChar
p }
    (\Int
j -> PatChars
ip{ patOpts :: [PatChar]
patOpts = Int -> PatChar -> [PatChar]
forall a. Int -> a -> [a]
replicate (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) PatChar
p })
    Maybe Int
j'
makePattern Pattern
R.PEmpty = PatChars -> Maybe PatChars
forall (m :: * -> *) a. Monad m => a -> m a
return PatChars
forall a. Monoid a => a
mempty
makePattern Pattern
r = PatChar -> PatChars
charPat (PatChar -> PatChars) -> Maybe PatChar -> Maybe PatChars
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Pattern -> Maybe PatChar
makeChar Pattern
r

makeAlts :: R.Pattern -> Maybe [PatChars]
makeAlts :: Pattern -> Maybe [PatChars]
makeAlts (R.PGroup Maybe Int
_ Pattern
r) = Pattern -> Maybe [PatChars]
makeAlts Pattern
r
makeAlts (R.PNonCapture Pattern
r) = Pattern -> Maybe [PatChars]
makeAlts Pattern
r
makeAlts (R.POr [Pattern]
o) = (Pattern -> Maybe [PatChars]) -> [Pattern] -> Maybe [PatChars]
forall (m :: * -> *) a b. Monad m => (a -> m [b]) -> [a] -> m [b]
concatMapM Pattern -> Maybe [PatChars]
makeAlts [Pattern]
o
makeAlts (R.PConcat [Pattern]
c) = [[PatChars]] -> [PatChars]
forall a. Monoid a => [[a]] -> [a]
cross ([[PatChars]] -> [PatChars])
-> Maybe [[PatChars]] -> Maybe [PatChars]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Pattern -> Maybe [PatChars]) -> [Pattern] -> Maybe [[PatChars]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Pattern -> Maybe [PatChars]
makeAlts [Pattern]
c where
  cross :: [[a]] -> [a]
cross [] = [a
forall a. Monoid a => a
mempty]
  cross ([a]
l:[[a]]
r) = do
    a
a <- [a]
l
    a
b <- [[a]] -> [a]
cross [[a]]
r
    a -> [a]
forall (m :: * -> *) a. Monad m => a -> m a
return (a
a a -> a -> a
forall a. Semigroup a => a -> a -> a
<> a
b)
makeAlts Pattern
r = PatChars -> [PatChars]
forall (m :: * -> *) a. Monad m => a -> m a
return (PatChars -> [PatChars]) -> Maybe PatChars -> Maybe [PatChars]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Pattern -> Maybe PatChars
makePattern Pattern
r

-- |Parse a string as a regular expression for matching anagrams, returning 'Left' error for invalid or unsupported regular expressions.  (Uses 'R.parseRegex'.)
parseAnaPattern :: String -> Either String AnaPattern
parseAnaPattern :: String -> Either String AnaPattern
parseAnaPattern String
r = case String -> Either ParseError (Pattern, (Int, DoPa))
R.parseRegex String
r of
  Left ParseError
e -> String -> Either String AnaPattern
forall a b. a -> Either a b
Left (ParseError -> String
forall a. Show a => a -> String
show ParseError
e)
  Right (Pattern
p, (Int, DoPa)
_) -> Either String AnaPattern
-> ([PatChars] -> Either String AnaPattern)
-> Maybe [PatChars]
-> Either String AnaPattern
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (String -> Either String AnaPattern
forall a b. a -> Either a b
Left String
"regexp contains features not supported for anagrams")
    (AnaPattern -> Either String AnaPattern
forall a b. b -> Either a b
Right (AnaPattern -> Either String AnaPattern)
-> ([PatChars] -> AnaPattern)
-> [PatChars]
-> Either String AnaPattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PatChars] -> AnaPattern
AnaPattern) (Maybe [PatChars] -> Either String AnaPattern)
-> Maybe [PatChars] -> Either String AnaPattern
forall a b. (a -> b) -> a -> b
$ Pattern -> Maybe [PatChars]
makeAlts (Pattern -> Maybe [PatChars]) -> Pattern -> Maybe [PatChars]
forall a b. (a -> b) -> a -> b
$ (Pattern -> Pattern) -> Pattern -> Pattern
R.dfsPattern Pattern -> Pattern
R.simplify' Pattern
p

instance FoldCase AnaPattern where
  foldCase :: AnaPattern -> AnaPattern
foldCase (AnaPattern [PatChars]
l) = [PatChars] -> AnaPattern
AnaPattern ((PatChars -> PatChars) -> [PatChars] -> [PatChars]
forall a b. (a -> b) -> [a] -> [b]
map PatChars -> PatChars
forall s. FoldCase s => s -> s
foldCase [PatChars]
l)

instance IsString AnaPattern where
  fromString :: String -> AnaPattern
fromString = (String -> AnaPattern)
-> (AnaPattern -> AnaPattern)
-> Either String AnaPattern
-> AnaPattern
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> AnaPattern
forall a. HasCallStack => String -> a
error AnaPattern -> AnaPattern
forall a. a -> a
id (Either String AnaPattern -> AnaPattern)
-> (String -> Either String AnaPattern) -> String -> AnaPattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Either String AnaPattern
parseAnaPattern