{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Octane.Type.Text
( Text(..)
, encodeLatin1
) where
import Data.Function ((&))
import qualified Control.DeepSeq as DeepSeq
import qualified Data.Aeson as Aeson
import qualified Data.Binary as Binary
import qualified Data.Binary.Bits as BinaryBit
import qualified Data.Binary.Bits.Get as BinaryBit
import qualified Data.Binary.Bits.Put as BinaryBit
import qualified Data.Binary.Get as Binary
import qualified Data.Binary.Put as Binary
import qualified Data.ByteString.Char8 as StrictBytes
import qualified Data.Char as Char
import qualified Data.Default.Class as Default
import qualified Data.OverloadedRecords.TH as OverloadedRecords
import qualified Data.String as String
import qualified Data.Text as StrictText
import qualified Data.Text.Encoding as Encoding
import qualified GHC.Generics as Generics
import qualified Octane.Type.Int32 as Int32
import qualified Octane.Utility.Endian as Endian
newtype Text = Text
{ textUnpack :: StrictText.Text
} deriving (Eq, Generics.Generic, Ord)
$(OverloadedRecords.overloadedRecord Default.def ''Text)
-- | Text is both length-prefixed and null-terminated.
instance Binary.Binary Text where
get = getText Binary.get Binary.getByteString id
put text = putText Binary.put Binary.putByteString id text
-- | Both length-prefixed and null-terminated. The bits in each byte are
-- reversed.
instance BinaryBit.BinaryBit Text where
getBits _ =
getText
(BinaryBit.getBits 32)
BinaryBit.getByteString
Endian.reverseBitsInStrictBytes
putBits _ text =
putText
(BinaryBit.putBits 32)
BinaryBit.putByteString
Endian.reverseBitsInStrictBytes
text
-- | Allows you to write 'Text' as string literals with @OverloadedStrings@.
-- Also allows using the 'String.fromString' helper function.
instance String.IsString Text where
fromString string = Text (StrictText.pack string)
instance DeepSeq.NFData Text
-- | Shown as a string literal, like @"this"@.
instance Show Text where
show text = show (#unpack text)
-- | Encoded directly as a JSON string.
instance Aeson.ToJSON Text where
toJSON text = text & #unpack & Aeson.toJSON
getText
:: (Monad m)
=> (m Int32.Int32)
-> (Int -> m StrictBytes.ByteString)
-> (StrictBytes.ByteString -> StrictBytes.ByteString)
-> m Text
getText getInt getBytes convertBytes = do
(Int32.Int32 rawSize) <- getInt
(size, decode)
-- In some tiny percentage of replays, this nonsensical string size
-- shows up. As far as I can tell the next 3 bytes are always null. And
-- the actual string is "None", which is 5 bytes including the null
-- terminator.
--
-- These annoying replays come from around 2015-10-25 to 2015-11-01.
<-
if rawSize == 0x05000000
then do
bytes <- getBytes 3
if StrictBytes.all (== '\0') bytes
then pure (5, Encoding.decodeLatin1)
else fail
("Unexpected Text bytes " ++
show bytes ++ " after size " ++ show rawSize)
else if rawSize < 0
then pure (-2 * fromIntegral rawSize, Encoding.decodeUtf16LE)
else pure (fromIntegral rawSize, Encoding.decodeLatin1)
bytes <- getBytes size
let rawText = bytes & convertBytes & decode
case StrictText.splitAt (StrictText.length rawText - 1) rawText of
(text, "") -> text & Text & pure
(text, "\0") -> text & Text & pure
_ -> fail ("Unexpected Text value " ++ show rawText)
putText
:: (Monad m)
=> (Int32.Int32 -> m ())
-> (StrictBytes.ByteString -> m ())
-> (StrictBytes.ByteString -> StrictBytes.ByteString)
-> Text
-> m ()
putText putInt putBytes convertBytes text = do
let fullText = text & #unpack & flip StrictText.snoc '\NUL'
let size = fullText & StrictText.length & fromIntegral
if StrictText.all Char.isLatin1 fullText
then do
size & Int32.Int32 & putInt
fullText & encodeLatin1 & convertBytes & putBytes
else do
size & negate & Int32.Int32 & putInt
fullText & Encoding.encodeUtf16LE & convertBytes & putBytes
-- | Encodes text as Latin-1. Note that this isn't really safe if the text has
-- characters that can't be encoded in Latin-1.
encodeLatin1 :: StrictText.Text -> StrictBytes.ByteString
encodeLatin1 text = text & StrictText.unpack & StrictBytes.pack