module Foundation.String.ModifiedUTF8
( fromModified
) where
import GHC.ST (runST, ST)
import GHC.Prim (Addr#)
import GHC.Ptr (Ptr(..))
import qualified Control.Monad (mapM)
import Foundation.Internal.Base
import Foundation.Internal.Types
import qualified Foundation.Array.Unboxed as Vec
import Foundation.Array.Unboxed (UArray)
import Foundation.Number
import Foundation.Array.Unboxed.Builder
import Foundation.Primitive.FinalPtr
import Foundation.String.UTF8Table
aone :: Offset Word8
aone = Offset 1
accessBytes :: Offset Word8 -> (Offset Word8 -> Word8) -> ([Word8], Offset Word8)
accessBytes offset getAtIdx = (loop offset, pastEnd)
where
nbytes :: Size Word8
nbytes = Size $ getNbBytes $ getAtIdx offset
pastEnd :: Offset Word8
pastEnd = aone + (offset `offsetPlusE` nbytes)
loop :: Offset Word8 -> [Word8]
loop off
| off == pastEnd = []
| otherwise = getAtIdx off : loop (off + aone)
buildByteArray :: Addr# -> ST st (UArray Word8)
buildByteArray addr = Vec.UVecAddr (Offset 0) (Size 100000) `fmap`
toFinalPtr (Ptr addr) (\_ -> return ())
fromModified :: Addr# -> UArray Word8
fromModified addr = runST $ do
ba <- buildByteArray addr
Vec.unsafeIndexer ba buildWithBytes
where
buildWithBytes getAt = build 64 $ loopBuilder getAt (Offset 0)
loopBuilder getAt offset =
let (bs, noffset) = accessBytes offset getAt
in case bs of
[] -> internalError "ModifiedUTF8.fromModified"
[0x00] -> return ()
[b1,b2] | b1 == 0xC0 && b2 == 0x80 -> appendTy 0x00 >> loopBuilder getAt noffset
_ -> Control.Monad.mapM appendTy bs >> loopBuilder getAt noffset