module Skylighting.Regex (
Regex
, RegexException
, RE(..)
, compileRegex
, matchRegex
, convertOctalEscapes
) where
import qualified Control.Exception as E
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.UTF8 (toString)
import GHC.Generics (Generic)
import System.IO.Unsafe (unsafePerformIO)
import Text.Printf
import Text.Regex.PCRE.ByteString
import Data.Data
import Data.Binary (Binary)
newtype RegexException = RegexException String
deriving (Show, Typeable, Generic)
instance E.Exception RegexException
data RE = RE{
reString :: BS.ByteString
, reCaseSensitive :: Bool
} deriving (Show, Read, Ord, Eq, Data, Typeable, Generic)
instance Binary RE
compileRegex :: Bool -> BS.ByteString -> Regex
compileRegex caseSensitive regexpStr =
let opts = compAnchored + compUTF8 +
if caseSensitive then 0 else compCaseless
in case unsafePerformIO $ compile opts (execNotEmpty) regexpStr of
Left (off,msg) -> E.throw $ RegexException $
"Error compiling regex /" ++ toString regexpStr ++
"/ at offset " ++ show off ++ "\n" ++ msg
Right r -> r
convertOctalEscapes :: String -> String
convertOctalEscapes [] = ""
convertOctalEscapes ('\\':'0':x:y:z:rest)
| all isOctalDigit [x,y,z] = '\\':x:y:z: convertOctalEscapes rest
convertOctalEscapes ('\\':x:y:z:rest)
| all isOctalDigit [x,y,z] ='\\':x:y:z: convertOctalEscapes rest
convertOctalEscapes ('\\':'o':'{':zs) =
case break (=='}') zs of
(ds, '}':rest) | all isOctalDigit ds && not (null ds) ->
case reads ('0':'o':ds) of
((n :: Int,[]):_) ->
printf "\\x{%x}" n ++ convertOctalEscapes rest
_ -> E.throw $ RegexException $
"Unable to read octal number: " ++ ds
_ -> '\\':'o':'{': convertOctalEscapes zs
convertOctalEscapes (x:xs) = x : convertOctalEscapes xs
isOctalDigit :: Char -> Bool
isOctalDigit c = c >= '0' && c <= '7'
matchRegex :: Regex -> BS.ByteString -> Maybe [BS.ByteString]
matchRegex r s = case unsafePerformIO (regexec r s) of
Right (Just (_, mat, _ , capts)) ->
Just (mat : capts)
Right Nothing -> Nothing
Left (_rc, msg) -> E.throw $ RegexException msg