{-|
Module      : Text.StringRandom.Parser
Description : Simple regular expression parser
Copyright   : Copyright (C) 2016- hiratara
License     : GPL-3
Maintainer  : hiratara@cpan.org
Stability   : experimental

Parse the regular expression so that it can be used with the
"Text.StringRandom" module.

See <https://github.com/cho45/String_random.js/blob/master/lib/String_random.js String_random.js>
-}

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Strict #-}
module Text.StringRandom.Parser
  ( Parsed(..)
  , processParse
  ) where

import qualified Data.Attoparsec.Text as Attoparsec
import Data.Attoparsec.Text
  ( char
  , anyChar
  , satisfy
  , string
  , digit
  , many1
  , endOfInput
  )
import Data.List ((\\))
import qualified Data.Text as Text
import Control.Applicative ((<|>), optional, many)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State.Strict (evalStateT, StateT, gets, put)

-- Int :: A sequence number of groups (X)
type RegParser a = StateT Int Attoparsec.Parser a

-- | Abstract syntax tree of parsed regular expression
data Parsed  = PClass   [Char]               -- ^ [abc], \d, [^abc]
             | PRange Int (Maybe Int) Parsed -- ^ X*, X{1,2}, X+, X?
             | PConcat [Parsed]              -- ^ XYZ
             | PSelect [Parsed]              -- ^ X|Y|Z
             | PGrouped Int Parsed           -- ^ (X)
             | PBackward Int                 -- ^ \1, \2, ..., \9
             | PIgnored                      -- ^ ^, $, \b
             deriving (Int -> Parsed -> ShowS
[Parsed] -> ShowS
Parsed -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Parsed] -> ShowS
$cshowList :: [Parsed] -> ShowS
show :: Parsed -> [Char]
$cshow :: Parsed -> [Char]
showsPrec :: Int -> Parsed -> ShowS
$cshowsPrec :: Int -> Parsed -> ShowS
Show, Parsed -> Parsed -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Parsed -> Parsed -> Bool
$c/= :: Parsed -> Parsed -> Bool
== :: Parsed -> Parsed -> Bool
$c== :: Parsed -> Parsed -> Bool
Eq)

pConcat :: [Parsed] -> Parsed
pConcat :: [Parsed] -> Parsed
pConcat [Parsed
x] = Parsed
x
pConcat [Parsed]
xs  = [Parsed] -> Parsed
PConcat [Parsed]
xs

pSelect :: [Parsed] -> Parsed
pSelect :: [Parsed] -> Parsed
pSelect [Parsed
x] = Parsed
x
pSelect [Parsed]
xs  = [Parsed] -> Parsed
PSelect [Parsed]
xs

{-|
'processParse' parses the regular expression string and returns an abstract
syntax tree. If there is an error in the regular expression, it returns the
'Left' value.
-}
processParse :: Text.Text -> Either String Parsed
processParse :: Text -> Either [Char] Parsed
processParse = let p :: Parser Parsed
p = forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT RegParser Parsed
selectParser Int
0
               in forall a. Parser a -> Text -> Either [Char] a
Attoparsec.parseOnly (Parser Parsed
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput)

selectParser :: RegParser Parsed
selectParser :: RegParser Parsed
selectParser = do
  Parsed
p0 <- RegParser Parsed
concats
  [Parsed]
ps <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Char -> Parser Char
char Char
'|') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> RegParser Parsed
concats)
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Parsed] -> Parsed
pSelect (Parsed
p0forall a. a -> [a] -> [a]
:[Parsed]
ps)
  where
    concats :: RegParser Parsed
concats = [Parsed] -> Parsed
pConcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many RegParser Parsed
rangedParser

rangedParser :: RegParser Parsed
rangedParser :: RegParser Parsed
rangedParser = do
  Parsed
p <- RegParser Parsed
groupingParser
  let opt :: Parser Parsed
opt  = Char -> Parser Char
char Char
'?' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Maybe Int -> Parsed -> Parsed
PRange Int
0 (forall a. a -> Maybe a
Just Int
1) Parsed
p)
      star :: Parser Parsed
star = Char -> Parser Char
char Char
'*' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Maybe Int -> Parsed -> Parsed
PRange Int
0 forall a. Maybe a
Nothing Parsed
p)
      plus :: Parser Parsed
plus = Char -> Parser Char
char Char
'+' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Maybe Int -> Parsed -> Parsed
PRange Int
1 forall a. Maybe a
Nothing Parsed
p)
      rep :: Parser Parsed
rep = do
        Char -> Parser Char
char Char
'{'
        Int
min  <- forall a. Read a => [Char] -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
digit
        Maybe [Char]
max' <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
',' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Char
digit
        let max :: Maybe Int
max = case Maybe [Char]
max' of
                    Maybe [Char]
Nothing -> forall a. a -> Maybe a
Just Int
min
                    Just [] -> forall a. Maybe a
Nothing
                    Just [Char]
ds -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a. Read a => [Char] -> a
read [Char]
ds
        Char -> Parser Char
char Char
'}'
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Maybe Int -> Parsed -> Parsed
PRange Int
min Maybe Int
max Parsed
p
  forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ Parser Parsed
opt forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Parsed
star forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Parsed
plus forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Parsed
rep forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return Parsed
p

groupingParser :: RegParser Parsed
groupingParser :: RegParser Parsed
groupingParser = RegParser Parsed
ngroup forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> RegParser Parsed
group forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> RegParser Parsed
classParser forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> RegParser Parsed
escaped forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> RegParser Parsed
dot forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> RegParser Parsed
ignored forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> RegParser Parsed
others
  where
    ngroup :: RegParser Parsed
ngroup  = forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Text -> Parser Text
string Text
"(?:") forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> RegParser Parsed
selectParser forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Char -> Parser Char
char Char
')')
    group :: RegParser Parsed
group   = do
      Int
n <- forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets (forall a. Num a => a -> a -> a
+ Int
1)
      forall (m :: * -> *) s. Monad m => s -> StateT s m ()
put Int
n
      Parsed
p <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Char -> Parser Char
char Char
'(') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> RegParser Parsed
selectParser forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Char -> Parser Char
char Char
')')
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Parsed -> Parsed
PGrouped Int
n Parsed
p
    escaped :: RegParser Parsed
escaped = forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ do
      Char
ch <- Char -> Parser Char
char Char
'\\' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Char
anyChar
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case Char
ch of
        Char
_ | Char
ch forall a. Eq a => a -> a -> Bool
== Char
'b'              -> Parsed
PIgnored -- Don't support \b
          | Char
ch forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'1' .. Char
'9'] -> Int -> Parsed
PBackward (forall a. Read a => [Char] -> a
read [Char
ch])
          | Bool
otherwise              -> [Char] -> Parsed
PClass (Char -> [Char]
classes Char
ch)
    dot :: RegParser Parsed
dot     = forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
'.' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> Parsed
PClass [Char]
allC)
    ignored :: RegParser Parsed
ignored = forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Char
satisfy (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'^', Char
'$']) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return Parsed
PIgnored
    others :: RegParser Parsed
others  = forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ [Char] -> Parsed
PClass forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. a -> [a] -> [a]
: [])  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Char]
reservedChars)

classParser :: RegParser Parsed
classParser :: RegParser Parsed
classParser = forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$
      [Char] -> Parsed
PClass forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Char]
allC forall a. Eq a => [a] -> [a] -> [a]
\\) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Text -> Parser Text
string Text
"[^" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text [Char]
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
']')
  forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Char] -> Parsed
PClass  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser Char
char Char
'[' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text [Char]
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
']')
  where
    p :: Attoparsec.Parser [Char]
    p :: Parser Text [Char]
p = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Text [Char]
p1
    p1 :: Parser Text [Char]
p1 = do
      [Char]
ch <- Parser Text [Char]
onechar
      Maybe [Char]
r  <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Char -> Parser Char
char Char
'-' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text [Char]
onechar)
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case Maybe [Char]
r of
        Just [Char]
rch
          | forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
ch forall a. Eq a => a -> a -> Bool
== Int
1 Bool -> Bool -> Bool
&& forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
rch forall a. Eq a => a -> a -> Bool
== Int
1
            -> forall a. Enum a => a -> a -> [a]
enumFromTo (forall a. [a] -> a
head [Char]
ch) (forall a. [a] -> a
head [Char]
rch)
          -- Handle the case of [^\w-\d]
          | Bool
otherwise
            -> [Char]
ch forall a. [a] -> [a] -> [a]
++ Char
'-' forall a. a -> [a] -> [a]
: [Char]
rch
        Maybe [Char]
Nothing -> [Char]
ch
    onechar :: Parser Text [Char]
onechar =  Char -> [Char]
classes forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser Char
char Char
'\\' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Char
anyChar)
           forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> [a] -> [a]
: [])  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Char]
classReservedChars)

uppersC, lowersC, digitsC, spacesC, othersC, allC :: [Char]
uppersC :: [Char]
uppersC = [Char
'A'..Char
'Z']
lowersC :: [Char]
lowersC = [Char
'a'..Char
'z']
digitsC :: [Char]
digitsC = [Char
'0'..Char
'9']
spacesC :: [Char]
spacesC = [Char]
" \n\t"
othersC :: [Char]
othersC = [Char]
"!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~"
allC :: [Char]
allC    = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]
uppersC, [Char]
lowersC, [Char]
digitsC, [Char]
" ", [Char]
othersC, [Char]
"_"]

classes :: Char -> [Char]
classes :: Char -> [Char]
classes Char
'd' = [Char]
digitsC
classes Char
'D' = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]
uppersC, [Char]
lowersC, [Char]
spacesC, [Char]
othersC, [Char]
"_"]
classes Char
'w' = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]
uppersC, [Char]
lowersC, [Char]
digitsC, [Char]
"_"]
classes Char
'W' = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]
spacesC, [Char]
othersC]
classes Char
't' = [Char]
"\t"
classes Char
'n' = [Char]
"\n"
classes Char
'v' = [Char]
"\x000b"
classes Char
'f' = [Char]
"\x000c"
classes Char
'r' = [Char]
"\r"
classes Char
's' = [Char]
spacesC
classes Char
'S' = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]
uppersC, [Char]
lowersC, [Char]
digitsC, [Char]
othersC, [Char]
"_"]
classes Char
'0' = [Char]
"\0"
classes Char
c   = [Char
c]

reservedChars :: [Char]
reservedChars :: [Char]
reservedChars = [Char]
"\\()|^$*+{?[." -- ]

classReservedChars :: [Char]
classReservedChars :: [Char]
classReservedChars = [Char]
"\\]" -- -^