-- | More user friendly regex api for PCRE regexes.
module EL.Private.Regex (
    Regex
    -- * compile
    , Option(..)
    , compile, compileOptions, compileUnsafe, compileOptionsUnsafe

    -- * matching
    , matches, groups, groupRanges
    -- * substitute
    , substitute, substituteGroups

    -- * misc
    , escape
) where
import qualified Data.ByteString as ByteString
import qualified Data.Text as Text
import Data.Text (Text)
import qualified Data.Text.Encoding as Encoding

import GHC.Stack (HasCallStack)
import qualified Text.Regex.PCRE.Heavy as PCRE
import Text.Regex.PCRE.Heavy (Regex)
import qualified Text.Regex.PCRE.Light as PCRE


-- * compile

fromText :: Text -> ByteString.ByteString
fromText = Encoding.encodeUtf8

data Option = CaseInsensitive | DotAll | Multiline
    deriving (Ord, Eq, Show)

compile :: String -> Either String Regex
compile = compileOptions []

compileOptions :: [Option] -> String -> Either String Regex
compileOptions options text =
    case PCRE.compileM (fromText (Text.pack text)) (convertOptions options) of
        Left msg -> Left $ "compiling regex " ++ show text ++ ": " ++ msg
        Right regex -> Right regex

convertOptions :: [Option] -> [PCRE.PCREOption]
convertOptions = (options++) . map convert
    where
    convert opt = case opt of
        CaseInsensitive -> PCRE.caseless
        DotAll -> PCRE.dotall
        Multiline -> PCRE.multiline
    options = [PCRE.utf8, PCRE.no_utf8_check]

-- | Will throw a runtime error if the regex has an error!
compileUnsafe :: HasCallStack => String -> Regex
compileUnsafe = compileOptionsUnsafe []

-- | Will throw a runtime error if the regex has an error!
compileOptionsUnsafe :: HasCallStack => [Option] -> String -> Regex
compileOptionsUnsafe options = either error id . compileOptions options

-- * match

matches :: Regex -> Text -> Bool
matches = flip (PCRE.=~)

-- | Return (complete_match, [group_match]).
groups :: Regex -> Text -> [(Text, [Text])]
groups = PCRE.scan

-- | Half-open ranges of where the regex matches.
groupRanges :: Regex -> Text -> [((Int, Int), [(Int, Int)])]
    -- ^ (entire, [group])
groupRanges = PCRE.scanRanges

-- * substitute

-- | TODO this is not the usual thing where it replaces \1 \2 etc., but
-- it replaces the entire match.
substitute :: Regex -> Text -> Text -> Text
substitute regex sub = PCRE.gsub regex sub

substituteGroups :: Regex -> (Text -> [Text] -> Text)
    -- ^ (complete_match -> groups -> replacement)
    -> Text -> Text
substituteGroups = PCRE.gsub

-- * misc

-- | Escape a string so the regex matches it literally.
escape :: String -> String
escape "" = ""
escape (c : cs)
    | c `elem` ("\\^$.[|()?*+{" :: [Char]) = '\\' : c : escape cs
    | otherwise = c : escape cs