{-# LANGUAGE TemplateHaskell #-} module Optics.Regex ( Match , Regex , match , groups , group , matchAndGroups , regexing , regex , mkRegex ) where import Control.Arrow ((&&&)) import Data.Bifunctor (second) import Data.Coerce (coerce) import Language.Haskell.TH (Q, Exp) import qualified Language.Haskell.TH as TH import Language.Haskell.TH.Quote (QuasiQuoter) import qualified Language.Haskell.TH.Quote as TH.Q import Optics.Core ( Getter, Iso', IxTraversal', Lens', Traversal', (%), _Right, chosen , conjoined, elementsOf, folded, foldOf, iso, itraversalVL, ix, lensVL , partsOf, re, to, traversalVL, traversed, traverseOf, view ) import StrBldr (Str, Bldr) import qualified StrBldr as Str import Text.Regex.PCRE.Heavy (PCREOption, Regex) import qualified Text.Regex.PCRE.Heavy as Pcre.H newtype Match = Match [Either Bldr Bldr] newtype MatchRange = MR (Int, Int) newtype GroupRanges = GR [MatchRange] -- Some internal definitions chunks :: Iso' Match [Either Bldr Bldr] chunks = iso unMatch Match where unMatch :: Match -> [Either Bldr Bldr] unMatch (Match m) = m building :: Iso' Bldr Str building = iso Str.fromBuilder Str.toBuilder -- Main interface groups :: IxTraversal' Str Match [Str] groups = itraversalVL $ \fn mat -> traverseOf groupsL (fn $ foldOf match mat) mat where groupsL :: Lens' Match [Str] groupsL = chunks % partsOf (traversed % _Right % building) group :: Int -> IxTraversal' Str Match Str group n = groups % ix n match :: IxTraversal' [Str] Match Str match = itraversalVL $ \fn mat -> traverseOf matchL (fn $ foldOf groups mat) mat where matchL :: Lens' Match Str matchL = chunks % matchBldr % building matchBldr :: Lens' [Either Bldr Bldr] Bldr matchBldr = lensVL $ \fn grps -> (:[]) . Right <$> fn (foldOf (folded % chosen) grps) regexing :: Regex -> IxTraversal' Int Str Match regexing pat = conjoined (regexingT pat) indexed where indexed :: IxTraversal' Int Str Match indexed = elementsOf (regexingT pat) (const True) regexingT :: Regex -> Traversal' Str Match regexingT pat = traversalVL $ \fn txt -> let matches :: [(MatchRange, GroupRanges)] matches = coerce (Str.allMatches pat txt) -- apply -- :: [Either a [Either Bldr Bldr]] -- -> f [Either a [Either Bldr Bldr]] apply = traverseOf (traversed % _Right % re chunks) fn in view building . collapse <$> apply (splitAll txt matches) regex :: QuasiQuoter regex = Pcre.H.re{TH.Q.quoteExp = quoter} where quoter :: String -> Q Exp quoter str = do reg <- TH.Q.quoteExp Pcre.H.re str regExpr <- TH.varE 'regexing pure (TH.AppE regExpr reg) mkRegex :: [PCREOption] -> QuasiQuoter mkRegex opts = (Pcre.H.mkRegexQQ opts){TH.Q.quoteExp = quoter} where quoter :: String -> Q Exp quoter str = do reg <- TH.Q.quoteExp (Pcre.H.mkRegexQQ opts) str regExpr <- TH.varE 'regexing pure (TH.AppE regExpr reg) matchAndGroups :: Getter Match (Str, [Str]) matchAndGroups = to (foldOf match &&& foldOf groups) -- Other bits and pieces collapse :: Monoid a => [Either a [Either a a]] -> a collapse = foldMap (either id inner) where -- inner :: [Either a a] -> a inner = foldOf (folded % chosen) splitAll :: Str -> [(MatchRange, GroupRanges)] -> [Either Bldr [Either Bldr Bldr]] splitAll txt matches = second split <$> splits txt 0 matches where split :: (Str, MatchRange, GroupRanges) -> [Either Bldr Bldr] split (txt', MR (start, _), grps) = groupSplit txt' start grps groupSplit :: Str -> Int -> GroupRanges -> [Either Bldr Bldr] groupSplit txt _ (GR []) | Str.null txt = [] | otherwise = [Left (Str.toBuilder txt)] groupSplit txt offset (GR (MR (grpStart, grpEnd) : rest)) | offset == grpStart = let (prefix, suffix) = Str.splitAt (grpEnd - offset) txt in Right (Str.toBuilder prefix) : groupSplit suffix grpEnd (GR rest) groupSplit txt offset matches@(GR (MR (grpStart, _) : _)) = let (prefix, suffix) = Str.splitAt (grpStart - offset) txt in Left (Str.toBuilder prefix) : groupSplit suffix grpStart matches splits :: Str -> Int -> [(MatchRange, GroupRanges)] -> [Either Bldr (Str, MatchRange, GroupRanges)] splits txt _ [] | Str.null txt = [] | otherwise = [Left (Str.toBuilder txt)] splits txt offset ((MR (start, end), grps) : rest) | offset == start = let (prefix, suffix) = Str.splitAt (end - offset) txt in Right (prefix, MR (start, end), grps) : splits suffix end rest splits txt offset matches@((MR (start, _), _) : _) = let (prefix, suffix) = Str.splitAt (start - offset) txt in Left (Str.toBuilder prefix) : splits suffix start matches