{-# LANGUAGE RecordWildCards #-}

module Text.Regex.Anagram.Compile
  ( AnaPat(..)
  , Anagrex(..)
  , compileAnagrex
  , makeAnagrex
  ) where

import           Control.DeepSeq (NFData(..))
import           Control.Monad (mfilter)
import           Data.CaseInsensitive (FoldCase(..))
import           Data.Foldable (fold)
import           Data.Functor.Identity (Identity(Identity))
import qualified Data.IntSet as S
import           Data.List (sort)
import           Data.Maybe (mapMaybe)
import           Data.String (IsString(..))

import Text.Regex.Anagram.Types
import Text.Regex.Anagram.Util
import Text.Regex.Anagram.Parse

-- |Compiled matching pattern
data AnaPat = AnaPat
  { AnaPat -> PatChars
patUncompiled :: PatChars -- ^original, uncompiled pattern (only for CI)
  , AnaPat -> PatCharsOf RLE
patChars :: PatCharsOf RLE
  , AnaPat -> PatCharsOf Identity
patSets :: PatCharsOf Identity
  , AnaPat -> Int
patMin :: Int -- ^minimum length
  , AnaPat -> Inf Int
patMax :: Inf Int -- ^maximum length
  } deriving (Int -> AnaPat -> ShowS
[AnaPat] -> ShowS
AnaPat -> String
(Int -> AnaPat -> ShowS)
-> (AnaPat -> String) -> ([AnaPat] -> ShowS) -> Show AnaPat
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AnaPat] -> ShowS
$cshowList :: [AnaPat] -> ShowS
show :: AnaPat -> String
$cshow :: AnaPat -> String
showsPrec :: Int -> AnaPat -> ShowS
$cshowsPrec :: Int -> AnaPat -> ShowS
Show)

-- |A compiled regular expression pattern to match anagrams.
-- Represented as an (expanded) list of alternative 'AnaPat's.
newtype Anagrex = Anagrex [AnaPat]
  deriving (Int -> Anagrex -> ShowS
[Anagrex] -> ShowS
Anagrex -> String
(Int -> Anagrex -> ShowS)
-> (Anagrex -> String) -> ([Anagrex] -> ShowS) -> Show Anagrex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Anagrex] -> ShowS
$cshowList :: [Anagrex] -> ShowS
show :: Anagrex -> String
$cshow :: Anagrex -> String
showsPrec :: Int -> Anagrex -> ShowS
$cshowsPrec :: Int -> Anagrex -> ShowS
Show)

compilePat :: PatChars -> AnaPat
compilePat :: PatChars -> AnaPat
compilePat p :: PatChars
p@PatChars{[PatChar]
PatChar
patStar :: forall (f :: * -> *). PatCharsOf f -> PatChar
patOpts :: forall (f :: * -> *). PatCharsOf f -> f PatChar
patReqs :: forall (f :: * -> *). PatCharsOf f -> f PatChar
patStar :: PatChar
patOpts :: [PatChar]
patReqs :: [PatChar]
..} = AnaPat :: PatChars
-> PatCharsOf RLE
-> PatCharsOf Identity
-> Int
-> Inf Int
-> AnaPat
AnaPat
  { patUncompiled :: PatChars
patUncompiled = PatChars
p
  , patChars :: PatCharsOf RLE
patChars = PatChars :: forall (f :: * -> *).
f PatChar -> f PatChar -> PatChar -> PatCharsOf f
PatChars
    { patReqs :: RLE PatChar
patReqs = [PatChar] -> RLE PatChar
forall a. Eq a => [a] -> RLE a
rle ([PatChar] -> RLE PatChar) -> [PatChar] -> RLE PatChar
forall a b. (a -> b) -> a -> b
$ [PatChar] -> [PatChar]
forall a. Ord a => [a] -> [a]
sort [PatChar]
patReqs
    , patOpts :: RLE PatChar
patOpts = [PatChar] -> RLE PatChar
forall a. Eq a => [a] -> RLE a
rle ([PatChar] -> RLE PatChar) -> [PatChar] -> RLE PatChar
forall a b. (a -> b) -> a -> b
$ [PatChar] -> [PatChar]
forall a. Ord a => [a] -> [a]
sort [PatChar]
opts
    , patStar :: PatChar
patStar = PatChar
patStar
    }
  , patSets :: PatCharsOf Identity
patSets = PatChars :: forall (f :: * -> *).
f PatChar -> f PatChar -> PatChar -> PatCharsOf f
PatChars
    { patReqs :: Identity PatChar
patReqs = PatChar -> Identity PatChar
forall a. a -> Identity a
Identity PatChar
rs
    , patOpts :: Identity PatChar
patOpts = PatChar -> Identity PatChar
forall a. a -> Identity a
Identity PatChar
os
    , patStar :: PatChar
patStar = PatChar
os PatChar -> PatChar -> PatChar
forall a. Semigroup a => a -> a -> a
<> PatChar
patStar
    }
  , patMin :: Int
patMin = Int
rlen
  , patMax :: Inf Int
patMax = case PatChar
patStar of
      PatSet ChrSet
s | ChrSet -> Bool
S.null ChrSet
s -> Int -> Inf Int
forall a. a -> Inf a
Fin (Int -> Inf Int) -> Int -> Inf Int
forall a b. (a -> b) -> a -> b
$ Int
rlen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [PatChar] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [PatChar]
opts
      PatChar
_ -> Inf Int
forall a. Inf a
Inf
  }
  where
  rs :: PatChar
rs = [PatChar] -> PatChar
forall (t :: * -> *) m. (Foldable t, Monoid m) => t m -> m
fold [PatChar]
patReqs
  os :: PatChar
os = PatChar
rs PatChar -> PatChar -> PatChar
forall a. Semigroup a => a -> a -> a
<> [PatChar] -> PatChar
forall (t :: * -> *) m. (Foldable t, Monoid m) => t m -> m
fold [PatChar]
opts
  rlen :: Int
rlen = [PatChar] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [PatChar]
patReqs
  opts :: [PatChar]
opts = (PatChar -> Maybe PatChar) -> [PatChar] -> [PatChar]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ((PatChar -> Bool) -> Maybe PatChar -> Maybe PatChar
forall (m :: * -> *) a. MonadPlus m => (a -> Bool) -> m a -> m a
mfilter (Bool -> Bool
not (Bool -> Bool) -> (PatChar -> Bool) -> PatChar -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PatChar -> Bool
nullChar) (Maybe PatChar -> Maybe PatChar)
-> (PatChar -> Maybe PatChar) -> PatChar -> Maybe PatChar
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PatChar -> Maybe PatChar
forall a. a -> Maybe a
Just (PatChar -> Maybe PatChar)
-> (PatChar -> PatChar) -> PatChar -> Maybe PatChar
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PatChar -> PatChar -> PatChar
intersectChar (PatChar -> PatChar
notChar PatChar
patStar)) [PatChar]
patOpts

compileAlts :: [PatChars] -> [AnaPat]
compileAlts :: [PatChars] -> [AnaPat]
compileAlts = (PatChars -> AnaPat) -> [PatChars] -> [AnaPat]
forall a b. (a -> b) -> [a] -> [b]
map PatChars -> AnaPat
compilePat

-- |Compile an already-parsed 'AnaPattern' into an 'Anagrex'.
compileAnagrex :: AnaPattern -> Anagrex
compileAnagrex :: AnaPattern -> Anagrex
compileAnagrex (AnaPattern [PatChars]
l) = [AnaPat] -> Anagrex
Anagrex ([AnaPat] -> Anagrex) -> [AnaPat] -> Anagrex
forall a b. (a -> b) -> a -> b
$ [PatChars] -> [AnaPat]
compileAlts [PatChars]
l

-- |Build a regular expression for matching anagrams from a string, returning 'Left' error for invalid or unsupported regular expressions.
-- (Uses 'Text.Regex.TDFA.ReadRegex.parseRegex'.)
-- This works by first expanding out a list of alternative patterns (like @"a|(b(c|d))"@ into @["a","bc","bd"]@) and then creating optimized pattern represenations for each.
makeAnagrex :: String -> Either String Anagrex
makeAnagrex :: String -> Either String Anagrex
makeAnagrex = (AnaPattern -> Anagrex)
-> Either String AnaPattern -> Either String Anagrex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap AnaPattern -> Anagrex
compileAnagrex (Either String AnaPattern -> Either String Anagrex)
-> (String -> Either String AnaPattern)
-> String
-> Either String Anagrex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Either String AnaPattern
parseAnaPattern

instance FoldCase AnaPat where
  foldCase :: AnaPat -> AnaPat
foldCase AnaPat{ patUncompiled :: AnaPat -> PatChars
patUncompiled = PatChars
p } = PatChars -> AnaPat
compilePat (PatChars -> AnaPat) -> PatChars -> AnaPat
forall a b. (a -> b) -> a -> b
$ PatChars -> PatChars
forall s. FoldCase s => s -> s
foldCase PatChars
p

-- |Used to create a case-insensitive version of a pattern.
-- Note that this involves a re-compilation of the parsed 'AnaPattern'.  You can avoid this by using 'Text.Regex.Anagram.makeAnagrexCI'.
instance FoldCase Anagrex where
  foldCase :: Anagrex -> Anagrex
foldCase (Anagrex [AnaPat]
l) = [AnaPat] -> Anagrex
Anagrex ([AnaPat] -> Anagrex) -> [AnaPat] -> Anagrex
forall a b. (a -> b) -> a -> b
$ (AnaPat -> AnaPat) -> [AnaPat] -> [AnaPat]
forall a b. (a -> b) -> [a] -> [b]
map AnaPat -> AnaPat
forall s. FoldCase s => s -> s
foldCase [AnaPat]
l

instance IsString Anagrex where
  fromString :: String -> Anagrex
fromString = (String -> Anagrex)
-> (Anagrex -> Anagrex) -> Either String Anagrex -> Anagrex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Anagrex
forall a. HasCallStack => String -> a
error Anagrex -> Anagrex
forall a. a -> a
id (Either String Anagrex -> Anagrex)
-> (String -> Either String Anagrex) -> String -> Anagrex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Either String Anagrex
makeAnagrex

instance NFData AnaPat where
  rnf :: AnaPat -> ()
rnf (AnaPat PatChars
_ PatCharsOf RLE
c PatCharsOf Identity
s Int
i Inf Int
j) = PatCharsOf RLE -> ()
forall a. NFData a => a -> ()
rnf PatCharsOf RLE
c () -> () -> ()
`seq` PatCharsOf Identity -> ()
forall a. NFData a => a -> ()
rnf PatCharsOf Identity
s () -> () -> ()
`seq` Int -> ()
forall a. NFData a => a -> ()
rnf Int
i () -> () -> ()
`seq` Inf Int -> ()
forall a. NFData a => a -> ()
rnf Inf Int
j
instance NFData Anagrex where
  rnf :: Anagrex -> ()
rnf (Anagrex [AnaPat]
l) = [AnaPat] -> ()
forall a. NFData a => a -> ()
rnf [AnaPat]
l