{-# LANGUAGE TemplateHaskell, QuasiQuotes #-}

{-|
  Module      :  Text.Regex.PCRE.QQ
  Copyright   :  (c) Matt Morrow 2008
  License     :  BSD3

  Maintainer  :  mjm2002@gmail.com
  Stability   :  unstable
  Portability :  non-portable (GHC QuasiQuotes)

  A quasiquoter for Text.Regex.PCRE regexes.
  This makes use of a new GHC extension known as QuasiQuotes.
  See the README for the temporary location of the docs for
  Language.Haskell.TH.Quote. See the EXAMPLES file for examples.

  > ghci> [$rx|([aeiou]).*(er|ing|tion)([\.,\?]*)$|] "helloing.!?!?!"
  > Just ["elloing.!?!?!","e","ing",".!?!?!"]
-}

module Text.Regex.PCRE.QQ (
    rx            -- the regex QuasiQuoter
  , regexToExpQ   -- String -> ExpQ
  , regexToPatQ   -- String -> PatQ

  -- should these be being exported here??
  , module Language.Haskell.TH.Quote
  , module Language.Haskell.TH.Syntax
  , module Language.Haskell.TH.Lib
  , module Language.Haskell.TH.Ppr
) where

import Language.Haskell.TH.Quote
import Language.Haskell.TH.Syntax
import Language.Haskell.TH.Lib
  hiding (match)
import Language.Haskell.TH.Ppr
import Text.Regex.PCRE.Light
  (Regex,PCREOption,PCREExecOption)
import qualified
  Text.Regex.PCRE.Light as PCRE
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.ByteString.Internal (c2w,w2c)

-----------------------------------------------------------------------------

{- data QuasiQuoter
    = QuasiQuoter { quoteExp :: String -> Q Exp,
                    quotePat :: String -> Q Pat}
              -- Defined in Language.Haskell.TH.Quote -}

{- |
> ghci> maybe [] tail $ [$rx|^([+-])?([0-9]+)\.([0-9]+)|] (show $ negate pi)
> ["-","3","141592653589793"]
-}
rx :: QuasiQuoter
rx = QuasiQuoter
        regexToExpQ
        regexToPatQ

-- | Transform a string rep
-- of a regex to an ExpQ. The
-- resulting ExpQ, when spliced,
-- results in a function of type
-- @String -> Maybe [String]@,
-- where the input is the String
-- to match on. The result is
-- Nothing on error, and Just
-- a list of results on success.
-- Note: I'm packing\/unpacking\/...
-- the ByteString unnecessarily
-- for convenience in testing
-- out the first go at this.
-- This will be dealt with in
-- the future.
regexToExpQ :: String -> ExpQ
regexToExpQ s = case PCRE.compileM (pack s) pcreOpts of
                  Left err -> error err
		             -- Give an error at compile time
			     -- if the regex string is invalid
		  Right _  -> [|
                               (return . fmap unpack =<<)
                                     . match (regex s) . pack
				     |]

-- | Transform a string (presumably)
--  containing a regex to a PatQ.
-- NOTE: Given a regex, a pattern
--    is constructed which matches
--    a literal string containing
--    the verbatim regex. It does
--    this because I couldn't think
--    of anything better for it to
--    do off the cuff. This needs
--    thought.
regexToPatQ :: String -> PatQ
regexToPatQ =
  litP . stringL
    . dropWhile (/='"')
      . show . regex

-- | Regex compilation. Temporarily uses
--  hardcoded options @extended@ and
--  @multiline@.
regex :: String -> Regex
regex = flip PCRE.compile pcreOpts . pack

-- | Regex matching.
match :: Regex -> B.ByteString -> Maybe [B.ByteString]
match rx = flip (PCRE.match rx) pcreExecOpts

-- | Adjust these to your tastes. An
--  interface to options seems to be
--  the next logical (and practical)
--  step.
pcreOpts :: [PCREOption]
pcreOpts =
  [ PCRE.extended
  , PCRE.multiline ]
  -- , dotall, caseless, utf8
  -- , newline_any, PCRE.newline_crlf ]

-- | Adjust these to your tastes. An
--  interface to options seems to be
--  the next logical (and practical)
--  step.
pcreExecOpts :: [PCREExecOption]
pcreExecOpts = []
  -- [ PCRE.exec_newline_crlf
  -- , exec_newline_any, PCRE.exec_notempty
  -- , PCRE.exec_notbol, PCRE.exec_noteol ]

pack :: String -> ByteString
pack = B.pack . fmap c2w

unpack :: ByteString -> String
unpack = fmap w2c . B.unpack

-----------------------------------------------------------------------------