{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
{-|
Module      : Parsley.Internal.Core.CharPred
Description : Packaging of offsets and positions.
License     : BSD-3-Clause
Maintainer  : Jamie Willis
Stability   : experimental

This module contains `CharPred`, a specialised defunctionalising for @Char -> Bool@ functions.
This can be used to efficiently query for character class membership.

@since 2.1.0.0
-}
module Parsley.Internal.Core.CharPred (
    CharPred(..), pattern Item, pattern Specific,
    apply, andPred, orPred, diffPred, optimisePredGiven, mergePreds,
    members, nonMembers,
    lamTerm
  ) where

import Prelude hiding (null)

import Data.RangeSet             (RangeSet, elems, unelems, fromRanges, full, member, fold, null, union, extractSingle, singleton, intersection, difference, isSubsetOf, sizeRanges, complement)
import Parsley.Internal.Core.Lam (Lam(Abs, App, Var, T, F, If), andLam, notLam, orLam)

{-|
Represents @Char -> Bool@ functions, potentially in a more inspectable way.

@since 2.1.0.0
-}
data CharPred where
  -- | This is a raw user-defined predicate, with little inspectability other than membership.
  UserPred :: (Char -> Bool) -> Lam (Char -> Bool) -> CharPred
  -- | This accounts for a character-class, implemented using a `RangeSet` for efficient querying and space.
  Ranges :: RangeSet Char -> CharPred

{-|
Represents @const True@.

@since 2.1.0.0
-}
pattern Item :: CharPred
pattern $bItem :: CharPred
$mItem :: forall {r}. CharPred -> ((# #) -> r) -> ((# #) -> r) -> r
Item <- Ranges (full -> True)
  where Item = RangeSet Char -> CharPred
Ranges (forall a. Enum a => [(a, a)] -> RangeSet a
fromRanges [(forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)])

{-|
Represents @(== c)@ for some specific @c@.

@since 2.1.0.0
-}
pattern Specific :: Char -> CharPred
pattern $bSpecific :: Char -> CharPred
$mSpecific :: forall {r}. CharPred -> (Char -> r) -> ((# #) -> r) -> r
Specific c <- Ranges (extractSingle -> Just c)
  where Specific Char
c = RangeSet Char -> CharPred
Ranges (forall a. Enum a => a -> RangeSet a
singleton Char
c)

{-|
Tests whether a given character falls within the predicate.

@since 2.1.0.0
-}
apply :: CharPred -> Char -> Bool
apply :: CharPred -> Char -> Bool
apply (UserPred Char -> Bool
f Lam (Char -> Bool)
_) Char
c = Char -> Bool
f Char
c
apply (Ranges RangeSet Char
rngs) Char
c = forall a. Enum a => a -> RangeSet a -> Bool
member Char
c RangeSet Char
rngs

{-|
Merges two predicates by creating one which only returns true when a character
is in both original predicates.

@since 2.1.0.0
-}
andPred :: CharPred -> CharPred -> CharPred
andPred :: CharPred -> CharPred -> CharPred
andPred (UserPred Char -> Bool
f Lam (Char -> Bool)
lf) CharPred
p = (Char -> Bool) -> Lam (Char -> Bool) -> CharPred
UserPred (\Char
c -> Char -> Bool
f Char
c Bool -> Bool -> Bool
&& CharPred -> Char -> Bool
apply CharPred
p Char
c) (forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs forall a b. (a -> b) -> a -> b
$ \Lam Char
c -> Lam Bool -> Lam Bool -> Lam Bool
andLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App Lam (Char -> Bool)
lf Lam Char
c) (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (CharPred -> Lam (Char -> Bool)
lamTerm CharPred
p) Lam Char
c))
andPred CharPred
p (UserPred Char -> Bool
f Lam (Char -> Bool)
lf) = (Char -> Bool) -> Lam (Char -> Bool) -> CharPred
UserPred (\Char
c -> CharPred -> Char -> Bool
apply CharPred
p Char
c Bool -> Bool -> Bool
&& Char -> Bool
f Char
c) (forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs forall a b. (a -> b) -> a -> b
$ \Lam Char
c -> Lam Bool -> Lam Bool -> Lam Bool
andLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (CharPred -> Lam (Char -> Bool)
lamTerm CharPred
p) Lam Char
c) (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App Lam (Char -> Bool)
lf Lam Char
c))
andPred (Ranges RangeSet Char
rngs1) (Ranges RangeSet Char
rngs2) = RangeSet Char -> CharPred
Ranges (RangeSet Char
rngs1 forall a. RangeSet a -> RangeSet a -> RangeSet a
`intersection` RangeSet Char
rngs2)

{-|
Occasionally, characters can pass through a predicate only to pass through another at a later point.
This given information can be used to optimise the new predicate the character is fed through.

This works as follows:

  * If the given knowledge is a subset of the new predicate, then we /know/ that any character check
    will have passed, because it already passed a stricter check. The predicate can, therefore, be
    optimised to `Item`.
  * Otherwise, the character can only pass through both predicates if it can pass through their
    intersection. If the intersection is smaller (in terms of the number of checks required to
    establish membership), then it should be used as it generates smaller code.
  * If neither of the above conditions are true, then the original predicate remains the most
    efficient for future tests.

@since 2.1.0.0
-}
optimisePredGiven :: CharPred -- ^ A predicate to be optimised with previous given knowledge.
                  -> CharPred -- ^ A predicate that is known to already be true.
                  -> CharPred
optimisePredGiven :: CharPred -> CharPred -> CharPred
optimisePredGiven (Ranges RangeSet Char
pred) (Ranges RangeSet Char
given)
  | forall a. RangeSet a -> RangeSet a -> Bool
isSubsetOf RangeSet Char
given RangeSet Char
pred = CharPred
Item
  | forall a. RangeSet a -> Int
sizeRanges RangeSet Char
inter forall a. Ord a => a -> a -> Bool
<= forall a. RangeSet a -> Int
sizeRanges RangeSet Char
pred = RangeSet Char -> CharPred
Ranges RangeSet Char
inter
  | Bool
otherwise = RangeSet Char -> CharPred
Ranges RangeSet Char
pred
  where
    inter :: RangeSet Char
inter = forall a. RangeSet a -> RangeSet a -> RangeSet a
intersection RangeSet Char
given RangeSet Char
pred
optimisePredGiven CharPred
p CharPred
_ = CharPred
p

mergePreds :: CharPred -> CharPred -> CharPred
mergePreds :: CharPred -> CharPred -> CharPred
mergePreds (Ranges RangeSet Char
p1) (Ranges RangeSet Char
p2)
  | forall a. RangeSet a -> RangeSet a -> Bool
isSubsetOf RangeSet Char
p1 RangeSet Char
p2 = RangeSet Char -> CharPred
Ranges RangeSet Char
p2
  | forall a. RangeSet a -> RangeSet a -> Bool
isSubsetOf RangeSet Char
p2 RangeSet Char
p1 = RangeSet Char -> CharPred
Ranges RangeSet Char
p1
mergePreds CharPred
_ CharPred
_ = CharPred
Item

{-|
Merges two predicates by creating one which only returns true when a character
is in either of the original predicates.

@since 2.1.0.0
-}
orPred :: CharPred -> CharPred -> CharPred
orPred :: CharPred -> CharPred -> CharPred
orPred (UserPred Char -> Bool
f Lam (Char -> Bool)
lf) CharPred
p = (Char -> Bool) -> Lam (Char -> Bool) -> CharPred
UserPred (\Char
c -> Char -> Bool
f Char
c Bool -> Bool -> Bool
|| CharPred -> Char -> Bool
apply CharPred
p Char
c) (forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs forall a b. (a -> b) -> a -> b
$ \Lam Char
c -> Lam Bool -> Lam Bool -> Lam Bool
orLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App Lam (Char -> Bool)
lf Lam Char
c) (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (CharPred -> Lam (Char -> Bool)
lamTerm CharPred
p) Lam Char
c))
orPred CharPred
p (UserPred Char -> Bool
f Lam (Char -> Bool)
lf) = (Char -> Bool) -> Lam (Char -> Bool) -> CharPred
UserPred (\Char
c -> CharPred -> Char -> Bool
apply CharPred
p Char
c Bool -> Bool -> Bool
|| Char -> Bool
f Char
c) (forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs forall a b. (a -> b) -> a -> b
$ \Lam Char
c -> Lam Bool -> Lam Bool -> Lam Bool
orLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (CharPred -> Lam (Char -> Bool)
lamTerm CharPred
p) Lam Char
c) (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App Lam (Char -> Bool)
lf Lam Char
c))
orPred (Ranges RangeSet Char
rngs1) (Ranges RangeSet Char
rngs2) = RangeSet Char -> CharPred
Ranges (RangeSet Char
rngs1 forall a. RangeSet a -> RangeSet a -> RangeSet a
`union` RangeSet Char
rngs2)

{-|
Merges two predicates by creating one which only returns true when a character
is in the first but not the second predicate.

@since 2.1.0.0
-}
diffPred :: CharPred -> CharPred -> CharPred
diffPred :: CharPred -> CharPred -> CharPred
diffPred (UserPred Char -> Bool
f Lam (Char -> Bool)
lf) CharPred
p = (Char -> Bool) -> Lam (Char -> Bool) -> CharPred
UserPred (\Char
c -> Char -> Bool
f Char
c Bool -> Bool -> Bool
&& Bool -> Bool
not (CharPred -> Char -> Bool
apply CharPred
p Char
c)) (forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs forall a b. (a -> b) -> a -> b
$ \Lam Char
c -> Lam Bool -> Lam Bool -> Lam Bool
andLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App Lam (Char -> Bool)
lf Lam Char
c) (Lam Bool -> Lam Bool
notLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (CharPred -> Lam (Char -> Bool)
lamTerm CharPred
p) Lam Char
c)))
diffPred CharPred
p (UserPred Char -> Bool
f Lam (Char -> Bool)
lf) = (Char -> Bool) -> Lam (Char -> Bool) -> CharPred
UserPred (\Char
c -> CharPred -> Char -> Bool
apply CharPred
p Char
c Bool -> Bool -> Bool
&& Bool -> Bool
not (Char -> Bool
f Char
c)) (forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs forall a b. (a -> b) -> a -> b
$ \Lam Char
c -> Lam Bool -> Lam Bool -> Lam Bool
andLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (CharPred -> Lam (Char -> Bool)
lamTerm CharPred
p) Lam Char
c) (Lam Bool -> Lam Bool
notLam (forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App Lam (Char -> Bool)
lf Lam Char
c)))
diffPred (Ranges RangeSet Char
rngs1) (Ranges RangeSet Char
rngs2) = RangeSet Char -> CharPred
Ranges (RangeSet Char
rngs1 forall a. RangeSet a -> RangeSet a -> RangeSet a
`difference` RangeSet Char
rngs2)

{-|
Given a predicate, returns the full range of characters it returns @True@ for.

@since 2.1.0.0
-}
members :: CharPred -> [Char]
members :: CharPred -> [Char]
members (UserPred Char -> Bool
f Lam (Char -> Bool)
_) = forall a. (a -> Bool) -> [a] -> [a]
filter Char -> Bool
f [forall a. Bounded a => a
minBound..forall a. Bounded a => a
maxBound]
members (Ranges RangeSet Char
rngs)  = forall a. Enum a => RangeSet a -> [a]
elems RangeSet Char
rngs

{-|
Given a predicate, returns the full range of characters it returns @False@ for.

@since 2.1.0.0
-}
nonMembers :: CharPred -> [Char]
nonMembers :: CharPred -> [Char]
nonMembers (UserPred Char -> Bool
f Lam (Char -> Bool)
_)      = forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
f) [forall a. Bounded a => a
minBound..forall a. Bounded a => a
maxBound]
nonMembers (Ranges RangeSet Char
rngs)       = forall a. (Bounded a, Enum a) => RangeSet a -> [a]
unelems RangeSet Char
rngs

{-|
Converts this predicate into a `Lam` term represention. This representation can
be optimised.

@since 2.1.0.0
-}
lamTerm :: CharPred -> Lam (Char -> Bool)
lamTerm :: CharPred -> Lam (Char -> Bool)
lamTerm (UserPred Char -> Bool
_ Lam (Char -> Bool)
t) = Lam (Char -> Bool)
t
lamTerm CharPred
Item = forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs (forall a b. a -> b -> a
const Lam Bool
T)
lamTerm (Ranges (forall a. RangeSet a -> Bool
null -> Bool
True)) = forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs (forall a b. a -> b -> a
const Lam Bool
F)
lamTerm (Ranges (forall a. Enum a => RangeSet a -> Maybe a
extractSingle forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Bounded a, Enum a) => RangeSet a -> RangeSet a
complement -> Just Char
c)) = forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (forall a. Bool -> Code a -> Lam a
Var Bool
True [||(/=)||]) (forall a. Bool -> Code a -> Lam a
Var Bool
True [||c||])
lamTerm (Ranges RangeSet Char
rngs) =
  forall a1 b. (Lam a1 -> Lam b) -> Lam (a1 -> b)
Abs forall a b. (a -> b) -> a -> b
$ \Lam Char
c ->
    forall a b.
Enum a =>
(a -> a -> b -> b -> b) -> b -> RangeSet a -> b
fold (Lam Char -> Char -> Char -> Lam Bool -> Lam Bool -> Lam Bool
conv Lam Char
c) Lam Bool
F RangeSet Char
rngs
  where
    conv :: Lam Char -> Char -> Char -> Lam Bool -> Lam Bool -> Lam Bool
conv Lam Char
c Char
l Char
u Lam Bool
lb Lam Bool
rb
    --  | l == u = eq c (Var True [||l||]) `or` (lb `or` rb)
    --  | otherwise = (lte (Var True [||l||]) c `and` lte c (Var True [||u||])) `or` (lb `or` rb)
      | Char
l forall a. Eq a => a -> a -> Bool
== Char
u        = Lam Char -> Lam Char -> Lam Bool
eq Lam Char
c (forall a. Bool -> Code a -> Lam a
Var Bool
True [||l||]) Lam Bool -> Lam Bool -> Lam Bool
`or` forall a. Lam Bool -> Lam a -> Lam a -> Lam a
If (Lam Char -> Lam Char -> Lam Bool
lt Lam Char
c (forall a. Bool -> Code a -> Lam a
Var Bool
True [||l||])) Lam Bool
lb Lam Bool
rb
      -- the left can be omitted here
      | Char
l forall a. Eq a => a -> a -> Bool
== forall a. Bounded a => a
minBound = Lam Char -> Lam Char -> Lam Bool
lte Lam Char
c (forall a. Bool -> Code a -> Lam a
Var Bool
True [||u||]) Lam Bool -> Lam Bool -> Lam Bool
`or` Lam Bool
rb
      -- the right can be omitted here
      | Char
u forall a. Eq a => a -> a -> Bool
== forall a. Bounded a => a
maxBound = Lam Char -> Lam Char -> Lam Bool
lte (forall a. Bool -> Code a -> Lam a
Var Bool
True [||l||]) Lam Char
c Lam Bool -> Lam Bool -> Lam Bool
`or` Lam Bool
lb
      | Bool
otherwise     = forall a. Lam Bool -> Lam a -> Lam a -> Lam a
If (Lam Char -> Lam Char -> Lam Bool
lte (forall a. Bool -> Code a -> Lam a
Var Bool
True [||l||]) Lam Char
c) (Lam Char -> Lam Char -> Lam Bool
lte Lam Char
c (forall a. Bool -> Code a -> Lam a
Var Bool
True [||u||]) Lam Bool -> Lam Bool -> Lam Bool
`or` Lam Bool
rb) Lam Bool
lb

    or :: Lam Bool -> Lam Bool -> Lam Bool
or = Lam Bool -> Lam Bool -> Lam Bool
orLam
    lte :: Lam Char -> Lam Char -> Lam Bool
    lte :: Lam Char -> Lam Char -> Lam Bool
lte = forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (forall a. Bool -> Code a -> Lam a
Var Bool
True [||(<=)||])
    lt :: Lam Char -> Lam Char -> Lam Bool
    lt :: Lam Char -> Lam Char -> Lam Bool
lt = forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (forall a. Bool -> Code a -> Lam a
Var Bool
True [||(<)||])
    eq :: Lam Char -> Lam Char -> Lam Bool
    eq :: Lam Char -> Lam Char -> Lam Bool
eq = forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a1 a. Lam (a1 -> a) -> Lam a1 -> Lam a
App (forall a. Bool -> Code a -> Lam a
Var Bool
True [||(==)||])

instance Show CharPred where
  show :: CharPred -> [Char]
show (UserPred Char -> Bool
_ Lam (Char -> Bool)
f) = forall a. Show a => a -> [Char]
show Lam (Char -> Bool)
f
  show CharPred
Item = [Char]
"const True"
  show (Specific Char
c) = forall (t :: Type -> Type) a. Foldable t => t [a] -> [a]
concat [[Char]
"(== ", forall a. Show a => a -> [Char]
show Char
c, [Char]
")"]
  show (Ranges RangeSet Char
rngs) = [Char]
"elem " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show RangeSet Char
rngs