{-# LANGUAGE CPP   #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE Safe  #-}
module Kleene.Internal.Functor (
    K (..),
    Greediness (..),
    -- * Constructors
    few,
    anyChar,
    oneof,
    char,
    charRange,
    dot,
    everything,
    everything1,
    -- * Queries
    isEmpty,
    isEverything,
    -- * Matching
    match,
    -- * Conversions
    toRE,
    toKleene,
    fromRE,
    toRA,
    ) where

import Prelude ()
import Prelude.Compat

import Control.Applicative (Alternative (..), liftA2)
import Data.Foldable       (toList)
import Data.Functor.Apply  (Apply (..))
import Data.RangeSet.Map   (RSet)
import Data.String         (IsString (..))

import qualified Data.RangeSet.Map      as RSet
import qualified Text.Regex.Applicative as R
import qualified Data.Functor.Alt       as Alt

import qualified Kleene.Classes         as C
import           Kleene.Internal.Pretty
import           Kleene.Internal.Sets
import qualified Kleene.RE              as RE

-- | Star behaviour
data Greediness
    = Greedy    -- ^ 'many'
    | NonGreedy -- ^ 'few'
  deriving (Eq, Ord, Show, Enum, Bounded)

-- | 'Applicative' 'Functor' regular expression.
data K c a where
    KEmpty  :: K c a
    KPure   :: a -> K c a
    KChar   :: (Ord c, Enum c) => RSet c -> K c c
    KAppend :: (a -> b -> r) -> K c a -> K c b -> K c r
    KUnion  :: K c a -> K c a -> K c a
    KStar   :: Greediness -> K c a -> K c [a]

    -- optimisations
    KMap    :: (a -> b) -> K c a -> K c b -- could use Pure and Append
    KString :: Eq c => [c] -> K c [c]     -- could use Char and Append

instance (c ~ Char, IsString a) => IsString (K c a) where
    fromString s = KMap fromString (KString s)

instance Functor (K c) where
    fmap _ KEmpty          = KEmpty
    fmap f (KPure x)       = KPure (f x)
    fmap f (KMap g k)      = KMap (f . g) k
    fmap f (KAppend g a b) = KAppend (\x y -> f (g x y)) a b
    fmap f k                    = KMap f k

instance Apply (K c) where
    KEmpty <.> _ = KEmpty
    _ <.> KEmpty = KEmpty

    KPure f <.> k = fmap f k
    k <.> KPure x = fmap ($ x) k

    f <.> x = KAppend ($) f x

    liftF2 = KAppend

instance Applicative (K c) where
    pure  = KPure
    (<*>) = (<.>)

#if MIN_VERSION_base(4,10,0)
    liftA2 = liftF2
#endif

instance Alt.Alt (K c) where
    KEmpty <!> k = k
    k <!> KEmpty = k
    KChar a <!> KChar b = KChar (RSet.union a b)

    a <!> b = KUnion a b

    many KEmpty      = KPure []
    many (KStar _ k) = KMap pure (KStar Greedy k)
    many k           = KStar Greedy k

    some KEmpty      = KEmpty
    some (KStar _ k) = KMap pure (KStar Greedy k)
    some k           = liftA2 (:) k (KStar Greedy k)

instance Alternative (K c) where
    empty = KEmpty
    (<|>) = (Alt.<!>)
    some  = Alt.some
    many  = Alt.many

-- | 'few', not 'many'.
--
-- Let's define two similar regexps
--
-- >>> let re1 = liftA2 (,) (few  $ char 'a') (many $ char 'a')
-- >>> let re2 = liftA2 (,) (many $ char 'a') (few  $ char 'a')
--
-- Their 'RE' behaviour is the same:
--
-- >>> C.equivalent (toRE re1) (toRE re2)
-- True
--
-- >>> map (C.match $ toRE re1) ["aaa","bbb"]
-- [True,False]
--
-- However, the 'RA' behaviour is different!
--
-- >>> R.match (toRA re1) "aaaa"
-- Just ("","aaaa")
--
-- >>> R.match (toRA re2) "aaaa"
-- Just ("aaaa","")
--
few :: K c a -> K c [a]
few KEmpty      = KPure []
few (KStar _ k) = KMap pure (KStar NonGreedy k)
few k           = KStar NonGreedy k

-------------------------------------------------------------------------------
--
-------------------------------------------------------------------------------

-- | >>> putPretty anyChar
-- ^[^]$
anyChar :: (Ord c, Enum c, Bounded c) => K c c
anyChar = KChar RSet.full

-- | >>> putPretty $ oneof ("foobar" :: [Char])
-- ^[a-bfor]$
oneof :: (Ord c, Enum c, Foldable f) => f c -> K c c
oneof = KChar . RSet.fromList . toList

-- | >>> putPretty $ char 'x'
-- ^x$
char :: (Ord c, Enum c) => c -> K c c
char = KChar . RSet.singleton

-- | >>> putPretty $ charRange 'a' 'z'
-- ^[a-z]$
charRange :: (Enum c, Ord c) => c -> c -> K c c
charRange a b = KChar (RSet.singletonRange (a, b))

-- | >>> putPretty dot
-- ^.$
dot :: K Char Char
dot = KChar dotRSet

-- | >>> putPretty everything
-- ^[^]*$
everything :: (Ord c, Enum c, Bounded c) => K c [c]
everything = many anyChar

-- | >>> putPretty everything1
-- ^[^][^]*$
everything1 :: (Ord c, Enum c, Bounded c) => K c [c]
everything1 = some anyChar

-- | Matches nothing?
isEmpty :: (Ord c, Enum c, Bounded c) => K c a -> Bool
isEmpty k = C.equivalent (toRE k) C.empty

-- | Matches whole input?
isEverything :: (Ord c, Enum c, Bounded c) => K c a -> Bool
isEverything k = C.equivalent (toRE k) C.everything

-------------------------------------------------------------------------------
-- Matching
-------------------------------------------------------------------------------

-- | Match using @regex-applicative@
match :: K c a -> [c] -> Maybe a
match = R.match . toRA

-------------------------------------------------------------------------------
-- RE
-------------------------------------------------------------------------------

-- | Convert to 'RE'.
--
-- >>> putPretty (toRE $ many "foo" :: RE.RE Char)
-- ^(foo)*$
--
toRE :: (Ord c, Enum c, Bounded c) => K c a -> RE.RE c
toRE = toKleene

-- | Convert to any 'Kleene'
toKleene :: C.FiniteKleene c k => K c a -> k
toKleene (KMap _ a)      = toKleene a
toKleene (KUnion a b)    = C.unions [toKleene a, toKleene b]
toKleene (KAppend _ a b) = C.appends [toKleene a, toKleene b]
toKleene (KStar _ a)     = C.star (toKleene a)
toKleene (KString s)     = C.appends (map C.char s)
toKleene KEmpty          = C.empty
toKleene (KPure _)       = C.eps
toKleene (KChar cs)      = C.fromRSet cs

-- | Convert from 'RE'.
--
-- /Note:/ all 'RE.REStar's are converted to 'Greedy' ones,
-- it doesn't matter, as we don't capture anything.
--
-- >>> match (fromRE "foobar") "foobar"
-- Just "foobar"
--
-- >>> match (fromRE $ C.star "a" <> C.star "a") "aaaa"
-- Just "aaaa"
--
fromRE :: (Ord c, Enum c) => RE.RE c -> K c [c]
fromRE (RE.REChars cs)    = pure <$> KChar cs
fromRE (RE.REAppend rs)   = concat <$> traverse fromRE rs
fromRE (RE.REUnion cs rs) = foldr (KUnion . fromRE) (pure <$> KChar cs) (toList rs)
fromRE (RE.REStar r)      = concat <$> KStar Greedy (fromRE r)

-------------------------------------------------------------------------------
-- regex-applicative
-------------------------------------------------------------------------------

-- | Convert 'K' to 'R.RE' from @regex-applicative@.
--
-- >>> R.match (toRA ("xx" *> everything <* "zz" :: K Char String)) "xxyyyzz"
-- Just "yyy"
--
-- See also 'match'.
--
toRA :: K c a -> R.RE c a
toRA KEmpty              = empty
toRA (KPure x)           = pure x
toRA (KChar cs)          = R.psym (\c -> RSet.member c cs)
toRA (KAppend f a b)     = liftA2 f (toRA a) (toRA b)
toRA (KUnion a b)        = toRA a <|> toRA b
toRA (KStar Greedy a)    = many (toRA a)
toRA (KStar NonGreedy a) = R.few (toRA a)
toRA (KMap f a)          = fmap f (toRA a)
toRA (KString s)         = R.string s

-------------------------------------------------------------------------------
-- JavaScript
-------------------------------------------------------------------------------

-- | Convert to non-matching JavaScript string which can be used
-- as an argument to @new RegExp@
--
-- >>> putPretty ("foobar" :: K Char String)
-- ^foobar$
--
-- >>> putPretty $ many ("foobar" :: K Char String)
-- ^(foobar)*$
--
instance c ~ Char => Pretty (K c a) where
    pretty = pretty . toRE

-------------------------------------------------------------------------------
-- Doctest
-------------------------------------------------------------------------------

-- $setup
--
-- >>> :set -XOverloadedStrings