{-|
Module      : Control.Lens.Regex
Description : PCRE regex combinators for interop with lens
Copyright   : (c) Chris Penner, 2019
License     : BSD3

Note that all traversals in this library are not techically lawful; the break the 'multi-set'
idempotence law; in reality this isn't usually a problem; but consider yourself warned. Test your code.
-}

{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}

module Control.Lens.Regex
    ( regex
    , iregex
    , match
    , groups
    , igroups

    -- * QuasiQuoter
    , rx
    , Match
    ) where

import Data.Text as T hiding (index)
import Text.Regex.PCRE.Heavy
import Control.Lens hiding (re, matching)
import Language.Haskell.TH.Quote

-- | Match represents a whole regex match; you can drill into it using 'match' or 'groups'
type Match = [Either Text Text]
type MatchRange = (Int, Int)
type GroupRanges = [(Int, Int)]

-- | 'QuasiQuoter' for compiling regexes.
-- This is just 're' re-exported under a different name so as not to conflict with @re@ from
-- 'Control.Lens'
rx :: QuasiQuoter
rx = re

-- | 'groups' but indexed by the group number. If you traverse over many matches you will
-- encounter duplicate indices.
-- E.g.
--
-- > > "a 1 b 2" ^.. regex [rx|(\w) (\d)|] . igroups . withIndex
-- > [(0,"a"),(1,"1"),(0,"b"),(1,"2")]
--
-- If you want only a specific group; combine this with `index`
-- E.g.
--
-- > > "a 1 b 2" ^.. regex [rx|(\w) (\d)|] . igroups . index 0
-- > ["a","b"]
igroups :: IndexedTraversal' Int Match T.Text
igroups = indexing groups

-- | traverse each group within a match. See 'igroups' for selecting specific groups.
groups :: Traversal' Match T.Text
groups = traversed . _Right

-- | Traverse each match as a whole
--
-- Use with 'regex' or 'iregex'
--
-- > > "one _two_ three _four_" ^.. regex [rx|_\w+_|] . match
-- > ["_two_","_four_"]
--
-- You can edit the traversal to perform a regex replace/substitution
-- > > "one _two_ three _four_" & regex [rx|_\w+_|] . match %~ T.toUpper
-- > "one _TWO_ three _FOUR_"
match :: Traversal' Match T.Text
match f grps = (:[]) . Right <$> f (grps ^. traversed . chosen)

-- | Indexed version of 'regex'.
iregex :: Regex -> IndexedTraversal' Int T.Text Match
iregex pattern = indexing (regex pattern)

-- | The base combinator for doing regex searches.
-- It's a traversal which selects 'Match'es; you can compose it with 'match' or 'groups'
-- to get the relevant parts of your match.
--
-- Getting all matches:
-- > > "one _two_ three _four_" ^.. regex [rx|_\w+_|] . match
-- > ["_two_","_four_"]
--
-- Regex replace/mutation
-- > > "one _two_ three _four_" & regex [rx|_\w+_|] . match %~ T.toUpper
-- > "one _TWO_ three _FOUR_"
--
-- Getting groups with their group index.
-- > > "1/2 and 3/4" ^.. regex [rx|(\d+)/(\d+)|] . igroups . withIndex
-- > [(0,"1"),(1,"2"),(0,"3"),(1,"4")]
--
-- Check for any matches:
-- > > has (regex [rx|ne+dle|]) "a needle in a haystack"
-- > True
--
-- Check for matches which also match a predicate:
-- > > has (regex [rx|\w+|] . match . filtered ((> 7) . T.length)) "one word here is loooooooong"
-- > True
--
-- Get the third match
-- > >  "alpha beta charlie delta" ^? (iregex [rx|\w+|] . index 2 . match)
-- > Just "charlie"
--
-- Replace the third match
-- > > "alpha beta charlie delta" & (iregex [rx|\w+|] . index 2 . match) .~ "GAMMA"
-- > "alpha beta GAMMA delta"
--
-- Match integers, 'Read' them into ints, then sort each match in-place
-- > > "Monday: 29, Tuesday: 99, Wednesday: 3" & partsOf' (iregex [rx|\d+|] . match . unpacked . _Show @Int) %~ sort
-- > "Monday: 3, Tuesday: 29, Wednesday: 99"
regex :: Regex -> Traversal' T.Text Match
regex pattern f txt =  collapse <$> apply (fmap splitAgain <$> splitter txt matches)
  where
    matches :: [(MatchRange, GroupRanges)]
    matches = scanRanges pattern txt
    collapse :: [Either Text [Either Text Text]] -> Text
    collapse xs = xs ^. folded . beside id (traversed . chosen)
    -- apply :: [Either Text [Either Text Text]] -> _ [Either Text [Either Text Text]]
    apply xs = xs & traversed . _Right %%~ f

splitter :: Text -> [(MatchRange, GroupRanges)] -> [Either T.Text (T.Text, GroupRanges)]
splitter t [] | T.null t = []
              | otherwise = [Left t]
splitter t (((start, end), grps) : rest) = do
    splitOnce t ((start, end), grps)
        <> splitter (T.drop end t) (rest & traversed . beside both (traversed . both) -~ end)

splitAgain :: (T.Text, GroupRanges) -> Match
splitAgain (t, []) | T.null t = []
                   | otherwise = [Left t]
splitAgain (t, (start, end) : rest) = do
    let (before, mid) = T.splitAt start t
    let focused = T.take (end - start) mid
    wrapIfNotEmpty before
        <> [Right focused]
        <> splitAgain ((T.drop end t), (rest & traversed . both -~ end))

splitOnce :: Text -> (MatchRange, GroupRanges) -> [Either T.Text (T.Text, GroupRanges)]
splitOnce t ((start, end), grps) = do
    let (before, mid) = T.splitAt start t
    let focused = T.take (end - start) mid
    wrapIfNotEmpty before
      <> [Right (focused, grps & traversed . both -~ start)]

wrapIfNotEmpty :: Text -> [Either Text a]
wrapIfNotEmpty txt
    | T.null txt = []
    | otherwise = [Left txt]