{-# OPTIONS_GHC -fglasgow-exts #-}

{-|
    Regular expressions, based on PCRE.

>   A king he was on carven throne
>   In many-pillared halls of stone
>   With golden roof and silver floor,
>   And runes of power upon the door...
-}

module RRegex (
    -- * Regular expressions
    Regex,
    mkRegex,
    mkRegexWithOpts,
    mkRegexWithPCRE,
    matchRegex,
    matchRegexAll,
    matchRegexWithPCRE,

    numSubs,

    -- Re-exported from PCRE
    pcreCaseless,   --  case insensitive mathing
    pcreMultiline,  --  ^ and $ match newline as well as beginning and end of string
    pcreDotall,     --  dot matches everything. including newline
    pcreExtended,  
    pcreAnchored, 
    pcreDollarEndonly, 
    pcreExtra, 
    pcreNotbol, 
    pcreNoteol, 
    pcreUngreedy,   --  matches are not greedy by default
    pcreNotempty,   --  refuse to match empty string
    pcreUtf8,       --  UTF-8 semantics
  ) where

import Prelude
import RRegex.PCRE 

import System.IO.Unsafe
import RRegex.Syntax
import Data.Array

matchRegexWithPCRE
    :: Regex                        -- ^ Compiled regular expression
    -> String                       -- ^ String to match against
    -> Int                          -- ^ Options
    -> IO (Maybe (Array Int (Int,Int)))
matchRegexWithPCRE = execute

-- | Makes a regular expression with the default options (multi-line,
-- case-sensitive).  The syntax of regular expressions is
-- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular
-- expressions). Note: this is arguably the incorrect default. single line 
-- is the default everywhere else.

mkRegex :: String -> Regex

-- | Makes a regular expression with PCRE flags
mkRegexWithPCRE
   :: String  -- ^ The regular expression to compile
   -> [Int]   -- ^ Flags
   -> Regex   -- ^ Returns: the compiled regular expression

-- | Makes a regular expression, where the multi-line and
-- case-sensitve options can be changed from the default settings.
mkRegexWithOpts
   :: String  -- ^ The regular expression to compile
   -> Bool    -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and 
              -- end of individual lines respectively, and @\'.\'@ does /not/
              -- match the newline character.
   -> Bool    -- ^ 'True' @\<=>@ matching is case-sensitive
   -> Regex   -- ^ Returns: the compiled regular expression

-- | Match a regular expression against a string
matchRegex
   :: Regex     -- ^ The regular expression
   -> String    -- ^ The string to match against
   -> Maybe [String]    -- ^ Returns: @'Just' strs@ if the match succeeded
                        -- (and @strs@ is the list of subexpression matches),
                        -- or 'Nothing' otherwise.

-- | Match a regular expression against a string, returning more information
-- about the match.
matchRegexAll
   :: Regex     -- ^ The regular expression
   -> String    -- ^ The string to match against
   -> Maybe ( String, String, String, [String] )
                -- ^ Returns: 'Nothing' if the match failed, or:
                -- 
                -- >  Just ( everything before match,
                -- >         portion matched,
                -- >         everything after the match,
                -- >         subexpression matches )

mkRegexWithPCRE s flags = unsafePerformIO $
    compile s (sum flags) >>= \x -> case x of
        Left (i, err) -> fail $
            "PCRE Regular Expression Error:\n" ++ s ++ "\n"
            ++ replicate i ' ' ++ "^ " ++ err
        Right p -> return p

mkRegex s = mkRegexWithOpts s False True
mkRegexWithOpts s single_line case_sensitive = mkRegexWithPCRE s [pcreUtf8, newline, igcase]
      where
        newline | single_line = pcreMultiline
                | otherwise   = 0

        igcase  | case_sensitive = 0 
                | otherwise      = pcreCaseless

matchRegex p str = fmap f (str =~~ p) where
    f :: Array Int String -> [String]
    f a = tail (elems a)


matchRegexAll p str = fmap f (str =~~ p) where
    f MR { mrBefore = b, mrAfter = a, mrSubList = sl, mrMatch = m } = (b,m,a,sl)