module OpenTheory.Data.Unicode.UTF8
where
import qualified OpenTheory.Data.Unicode as Data.Unicode
import qualified OpenTheory.Parser as Parser
import qualified OpenTheory.Parser.Stream as Parser.Stream
import qualified OpenTheory.Primitive.Byte as Primitive.Byte
import qualified OpenTheory.Primitive.Word16 as Primitive.Word16
isContinuationByte :: Primitive.Byte.Byte -> Bool
isContinuationByte b =
Primitive.Byte.bit b 7 && not (Primitive.Byte.bit b 6)
parseContinuationByte ::
Parser.Parser Primitive.Byte.Byte Primitive.Byte.Byte
parseContinuationByte = Parser.parseSome isContinuationByte
parseTwoContinuationBytes ::
Parser.Parser Primitive.Byte.Byte
(Primitive.Byte.Byte, Primitive.Byte.Byte)
parseTwoContinuationBytes =
Parser.parsePair parseContinuationByte parseContinuationByte
parseThreeContinuationBytes ::
Parser.Parser Primitive.Byte.Byte
(Primitive.Byte.Byte, (Primitive.Byte.Byte, Primitive.Byte.Byte))
parseThreeContinuationBytes =
Parser.parsePair parseContinuationByte parseTwoContinuationBytes
decoder :: Parser.Parser Primitive.Byte.Byte Data.Unicode.Unicode
decoder =
Parser.Parser parse
where
parse b0 s =
if Primitive.Byte.bit b0 7 then
if Primitive.Byte.bit b0 6 then
if Primitive.Byte.bit b0 5 then
if Primitive.Byte.bit b0 4 then
if Primitive.Byte.bit b0 3 then Nothing
else
Parser.parse
(Parser.partialMap (decode3 b0)
parseThreeContinuationBytes) s
else
Parser.parse
(Parser.partialMap (decode2 b0) parseTwoContinuationBytes)
s
else
Parser.parse
(Parser.partialMap (decode1 b0) parseContinuationByte) s
else Nothing
else
let pl = Data.Unicode.Plane 0 in
let pos =
Data.Unicode.Position (Primitive.Word16.fromBytes b0 0) in
let ch = Data.Unicode.Unicode (pl, pos) in
Just (ch, s)
decode1 b0 b1 =
let pl = Data.Unicode.Plane 0 in
let p1 = Primitive.Byte.shiftRight (Primitive.Byte.and b0 28) 2 in
let y0 = Primitive.Byte.shiftLeft (Primitive.Byte.and b0 3) 6 in
let x0 = Primitive.Byte.and b1 63 in
let p0 = Primitive.Byte.or y0 x0 in
if p1 == 0 && not (Primitive.Byte.bit p0 7) then Nothing
else
let pos =
Data.Unicode.Position (Primitive.Word16.fromBytes p0 p1) in
let ch = Data.Unicode.Unicode (pl, pos) in
Just ch
decode2 b0 (b1, b2) =
let z1 = Primitive.Byte.shiftLeft (Primitive.Byte.and b0 15) 4 in
let y1 = Primitive.Byte.shiftRight (Primitive.Byte.and b1 60) 2 in
let p1 = Primitive.Byte.or z1 y1 in
if p1 < 8 || 216 <= p1 && p1 <= 223 then Nothing
else
let y0 = Primitive.Byte.shiftLeft (Primitive.Byte.and b1 3) 6 in
let x0 = Primitive.Byte.and b2 63 in
let p0 = Primitive.Byte.or y0 x0 in
if p1 == 255 && 254 <= p0 then Nothing
else
let pl = Data.Unicode.Plane 0 in
let pos =
Data.Unicode.Position (Primitive.Word16.fromBytes p0 p1) in
let ch = Data.Unicode.Unicode (pl, pos) in
Just ch
decode3 b0 (b1, (b2, b3)) =
let w = Primitive.Byte.shiftLeft (Primitive.Byte.and b0 7) 2 in
let z = Primitive.Byte.shiftRight (Primitive.Byte.and b1 48) 4 in
let p = Primitive.Byte.or w z in
if p == 0 || 16 < p then Nothing
else
let pl = Data.Unicode.Plane p in
let z1 = Primitive.Byte.shiftLeft (Primitive.Byte.and b1 15) 4 in
let y1 = Primitive.Byte.shiftRight (Primitive.Byte.and b2 60) 2 in
let p1 = Primitive.Byte.or z1 y1 in
let y0 = Primitive.Byte.shiftLeft (Primitive.Byte.and b2 3) 6 in
let x0 = Primitive.Byte.and b3 63 in
let p0 = Primitive.Byte.or y0 x0 in
let pos =
Data.Unicode.Position (Primitive.Word16.fromBytes p0 p1) in
let ch = Data.Unicode.Unicode (pl, pos) in
Just ch
decodeStream ::
Parser.Stream.Stream Primitive.Byte.Byte ->
Parser.Stream.Stream Data.Unicode.Unicode
decodeStream = Parser.parseStream decoder
decode :: [Primitive.Byte.Byte] -> Maybe [Data.Unicode.Unicode]
decode bs = Parser.Stream.toList (decodeStream (Parser.Stream.fromList bs))
encoder :: Data.Unicode.Unicode -> [Primitive.Byte.Byte]
encoder =
\ch ->
let (pl, pos) = Data.Unicode.unUnicode ch in
let p = Data.Unicode.unPlane pl in
let (p0, p1) =
Primitive.Word16.toBytes (Data.Unicode.unPosition pos) in
if p == 0 then
if p1 == 0 && not (Primitive.Byte.bit p0 7) then p0 : []
else if Primitive.Byte.and 248 p1 == 0 then encode1 p1 p0
else encode2 p1 p0
else encode3 p p1 p0
where
encode1 p1 p0 =
let b00 = Primitive.Byte.shiftLeft p1 2 in
let b01 = Primitive.Byte.shiftRight (Primitive.Byte.and p0 192) 6 in
let b0 = Primitive.Byte.or 192 (Primitive.Byte.or b00 b01) in
let b10 = Primitive.Byte.and p0 63 in
let b1 = Primitive.Byte.or 128 b10 in
b0 : b1 : []
encode2 p1 p0 =
let b00 = Primitive.Byte.shiftRight (Primitive.Byte.and p1 240) 4 in
let b0 = Primitive.Byte.or 224 b00 in
let b10 = Primitive.Byte.shiftLeft (Primitive.Byte.and p1 15) 2 in
let b11 = Primitive.Byte.shiftRight (Primitive.Byte.and p0 192) 6 in
let b1 = Primitive.Byte.or 128 (Primitive.Byte.or b10 b11) in
let b20 = Primitive.Byte.and p0 63 in
let b2 = Primitive.Byte.or 128 b20 in
b0 : b1 : b2 : []
encode3 p p1 p0 =
let b00 = Primitive.Byte.shiftRight (Primitive.Byte.and p 28) 2 in
let b0 = Primitive.Byte.or 240 b00 in
let b10 = Primitive.Byte.shiftLeft (Primitive.Byte.and p 3) 4 in
let b11 = Primitive.Byte.shiftRight (Primitive.Byte.and p1 240) 4 in
let b1 = Primitive.Byte.or 128 (Primitive.Byte.or b10 b11) in
let b20 = Primitive.Byte.shiftLeft (Primitive.Byte.and p1 15) 2 in
let b21 = Primitive.Byte.shiftRight (Primitive.Byte.and p0 192) 6 in
let b2 = Primitive.Byte.or 128 (Primitive.Byte.or b20 b21) in
let b30 = Primitive.Byte.and p0 63 in
let b3 = Primitive.Byte.or 128 b30 in
b0 : b1 : b2 : b3 : []
encode :: [Data.Unicode.Unicode] -> [Primitive.Byte.Byte]
encode chs = concat (map encoder chs)