module Bio.GFF3.Escape ( unEscapeByteString
, escapeByteString, escapeAllBut, escapeAllOf
)
where
import Control.Monad.Error
import Data.Array.ST
import Data.Array.Unboxed
import qualified Data.ByteString.Lazy as LBSW
import qualified Data.ByteString.Lazy.Char8 as LBS
import Data.Char
import Data.Word
unEscapeByteString :: (Error e, MonadError e m) => LBS.ByteString -> m LBS.ByteString
unEscapeByteString bstr = case LBS.split '%' bstr of
[] -> return LBS.empty
[_] -> return bstr
(bstr0:brest) -> foldM unEscapeOneSplit bstr0 brest
where unEscapeOneSplit leftBStr nextBStr
= do (x1, next1) <- maybe shortEscape return $ LBS.uncons nextBStr
(x2, rest) <- maybe shortEscape return $ LBS.uncons next1
xch <- unescapeChar x1 x2
return $ leftBStr `LBS.append` LBS.singleton xch `LBS.append` rest
unescapeChar x1 x2
| isHexDigit x1 && isHexDigit x2 = return $ chr (digitToInt x1 * 16 + digitToInt x2)
| otherwise = throwError $ strMsg $ "Bad %-escape, " ++ show ['%',x1,x2]
shortEscape = throwError $ strMsg $ "Bad %-escape, too short"
escapeByteString :: (Char -> Bool) -> LBS.ByteString -> LBS.ByteString
escapeByteString isNotEscaped bstr
= case LBS.span isNotEscaped bstr of
(leftStr, rightStr) -> case LBSW.uncons rightStr of
Nothing -> leftStr
Just (escch, rest) -> leftStr `LBS.append` escapeWord8 escch `LBS.append` escapeByteString isNotEscaped rest
escapeWord8 :: Word8 -> LBS.ByteString
escapeWord8 wch = case (fromIntegral wch) `quotRem` 16 of
(lch, rch) -> LBS.pack ['%',intToDigit lch, intToDigit rch]
escapeTable :: Bool -> [Char] -> UArray Char Bool
escapeTable def exc = runSTUArray $ do starr <- newArray (minBound, maxBound) def
mapM_ (\ch -> writeArray starr ch $ not def) exc
return starr
escapeAllBut :: String -> LBS.ByteString -> LBS.ByteString
escapeAllBut notEscaped = escapeByteString isNotEscaped
where isNotEscaped ch = table ! ch
table = escapeTable False notEscaped
escapeAllOf :: String -> LBS.ByteString -> LBS.ByteString
escapeAllOf allEscaped = escapeByteString isNotEscaped
where isNotEscaped ch = table ! ch
table = escapeTable True allEscaped