{-# LANGUAGE FlexibleContexts                         #-}
{-# LANGUAGE GADTs                                    #-}
{-# LANGUAGE MultiParamTypeClasses                    #-}
{-# LANGUAGE NoImplicitPrelude                        #-}
{-# LANGUAGE OverloadedStrings                        #-}
{-# LANGUAGE ScopedTypeVariables                      #-}

{-# OPTIONS -fwarn-unused-imports #-}

-- | This module provides perl-style pattern matching.  It is intended
-- for use with minimal Haskell knowledge, so it moves away from the
-- complex regex-* type signatures for the sake of clarity, and always
-- uses the same string types for source text and patterns.  See
-- 'tests' in source code for a few examples.
module Text.Regex.Easy
  ( module Text.Regex.PCRE
  , Match, Source
  , (=~+)
  , (=~-)
  , (=~#)
  , (=~++)
  , replaceRegex
  , replaceRegexAll
  )
where

import Data.Array as AR
import Data.Function
import Data.List as List
import Data.Monoid
import Data.String.Conversions
import Prelude hiding ((++))
import Text.Regex.PCRE

import qualified Data.ByteString.Lazy as LBS


-- | Rudimentary tests.  Read the source as a form of documentation.
tests :: Bool
tests = and $
    (("file_1.txt" =~+ "^(.*)_(\\d).txt$") ==
     [ ( "file_1.txt" , ( 0 , 10 ) )
     , ( "file" , ( 0 , 4 ) )
     , ( "1" , ( 5 , 1 ) )
     ]) :

    (("file_1.txt" =~- "^(.*)_(\\d).txt$") ==
     ["file_1.txt", "file", "1"]) :

    ("file_1.txt" =~# "^(.*)_(\\d).txt$") :

    (let q :: LBS = "wif kwof ..wif,, wif,  8fwif"
         p :: SBS = "\\Sw.f"
     in ((q =~+ p) ==
         [ ( "kwof" , (  4 , 4 ) ) ]) &&
        ((q =~++ p) ==
         [ [ ( "kwof" , (  4 , 4 ) ) ]
         , [ ( ".wif" , ( 10 , 4 ) ) ]
         , [ ( "fwif" , ( 24 , 4 ) ) ]
         ])) :

    (let q :: LBS = "wif kwof ..wif,, wif,  8fwif"
         p :: SBS = "\\Sw.f"
         f ([(a,_)] :: [(LBS, (MatchOffset, MatchLength))]) = Just $ "@" <> a <> "@"
     in (replaceRegex q p f == "wif @kwof@ ..wif,, wif,  8fwif") &&
        (replaceRegexAll q p f == "wif @kwof@ .@.wif@,, wif,  8@fwif@")) :

    []


type Match = SBS
type Source = LBS


-- | Convenience wrapper around '(=~)', that trades flexibility off
-- for compactness.
(=~+) :: Source -> Match -> [(Source, (MatchOffset, MatchLength))]
(=~+) source match = elems (getAllTextSubmatches (source =~ match) :: MatchText Source)


-- | Convenience wrapper for '(=~+)' that chops rarely needed offsets
-- and lengths off the result.
(=~-) :: Source -> Match -> [Source]
(=~-) source match = map fst $ source =~+ match


-- | Convenience function for '(=~+)' with match result 'Bool'.
(=~#) :: Source -> Match -> Bool
(=~#) source match = not . null $ source =~+ match


-- | Like '(=~+)', but find all matches, not just the first one.
(=~++) :: Source -> Match -> [[(Source, (MatchOffset, MatchLength))]]
(=~++) source match = case source =~+ match of
                        [] -> []
                        x@((_, (holeStart, holeEnd)):_) -> x : map (shift (holeStart + holeEnd))
                                                                   (LBS.drop (fromIntegral $ holeStart + holeEnd) source =~++ match)
  where
    shift :: Int -> [(Source, (MatchOffset, MatchLength))] -> [(Source, (MatchOffset, MatchLength))]
    shift o' = map (\ (s, (o, l)) -> (s, (o + o', l)))


-- | Replace first match with result of a function of the match.
replaceRegex :: Source -> Match -> ([(Source, (MatchOffset, MatchLength))] -> Maybe Source) -> Source
replaceRegex source match trans = case source =~+ match of
      m@((_, (offset, length)):_) -> let before = LBS.take (fromIntegral offset) source
                                         after = LBS.drop (fromIntegral $ offset + length) source
                                     in case trans m of
                                          Just m' -> before <> m' <> after
                                          Nothing -> source


-- | Replace all matches with result of a function of the match.
replaceRegexAll :: Source -> Match -> ([(Source, (MatchOffset, MatchLength))] -> Maybe Source) -> Source
replaceRegexAll source match trans = case source =~+ match of
      [] -> source
      m@((_, (offset, length)):_) -> case trans m of
                                        Just m' -> let before = LBS.take (fromIntegral offset) source
                                                       after  = LBS.drop (fromIntegral $ offset + length) source
                                                   in before <> m' <> replaceRegexAll after match trans
                                        Nothing -> let before = LBS.take (fromIntegral $ offset + length) source
                                                       after  = LBS.drop (fromIntegral $ offset + length) source
                                                   in before <> replaceRegexAll after match trans