{- | module: $Header$ description: Unicode characters license: MIT maintainer: Joe Leslie-Hurd stability: provisional portability: portable -} module OpenTheory.Unicode.UTF8 where import qualified OpenTheory.Natural.Bits as Bits import qualified OpenTheory.Parser as Parser import qualified OpenTheory.Parser.Stream as Stream import qualified OpenTheory.Primitive.Byte as Byte import qualified OpenTheory.Primitive.Natural as Natural import qualified OpenTheory.Unicode as Unicode parseAscii :: Parser.Parser Byte.Byte Natural.Natural parseAscii = Parser.token (\b -> if Byte.bit b 7 then Nothing else Just (Byte.toNatural b)) isContinuationByte :: Byte.Byte -> Bool isContinuationByte b = Byte.bit b 7 && not (Byte.bit b 6) parseMultibyte :: Parser.Parser Byte.Byte Natural.Natural parseMultibyte = Parser.sequenceParser (Parser.token (\b -> if Byte.bit b 6 then if Byte.bit b 5 then if Byte.bit b 4 then if Byte.bit b 3 then Nothing else Just (parse4 b) else Just (parse3 b) else Just (parse2 b) else Nothing)) where {-parse2 :: Byte.Byte -> Parser.Parser Byte.Byte Natural.Natural-} parse2 b = Parser.filterParser (Parser.foldN addContinuationByte 0 (Byte.toNatural (Byte.and b 31))) (\n -> 128 <= n) {-parse3 :: Byte.Byte -> Parser.Parser Byte.Byte Natural.Natural-} parse3 b = Parser.filterParser (Parser.foldN addContinuationByte 1 (Byte.toNatural (Byte.and b 15))) (\n -> 2048 <= n) {-parse4 :: Byte.Byte -> Parser.Parser Byte.Byte Natural.Natural-} parse4 b = Parser.filterParser (Parser.foldN addContinuationByte 2 (Byte.toNatural (Byte.and b 7))) (\n -> 65536 <= n) {-addContinuationByte :: Byte.Byte -> Natural.Natural -> Maybe Natural.Natural-} addContinuationByte b n = if isContinuationByte b then Just (Byte.toNatural (Byte.and b 63) + Natural.shiftLeft n 6) else Nothing parseNatural :: Parser.Parser Byte.Byte Natural.Natural parseNatural = Parser.orelse parseAscii parseMultibyte parseUnicode :: Parser.Parser Byte.Byte Unicode.Unicode parseUnicode = Parser.mapPartial parseNatural (\n -> if Unicode.invariant n then Just (Unicode.Unicode n) else Nothing) parse :: Parser.Parser Byte.Byte (Either Byte.Byte Unicode.Unicode) parse = Parser.orelse (Parser.mapParser parseUnicode Right) (Parser.mapParser Parser.anyToken Left) decode :: [Byte.Byte] -> [Either Byte.Byte Unicode.Unicode] decode b = fst (Stream.toList (Parser.parse parse (Stream.fromList b))) encodeAscii :: Natural.Natural -> [Byte.Byte] encodeAscii n = Byte.fromNatural n : [] encodeUnicode :: Unicode.Unicode -> [Byte.Byte] encodeUnicode = \c -> let n = Unicode.unUnicode c in if n < 128 then encodeAscii n else if n < 2048 then encode2 n else if n < 65536 then encode3 n else encode4 n where {-encode2 :: Natural.Natural -> [Byte.Byte]-} encode2 n = let n1 = Natural.shiftRight n 6 in let b0 = Byte.or 192 (Byte.fromNatural n1) in let b1 = Byte.or 128 (Byte.fromNatural (Bits.bound n 6)) in b0 : b1 : [] {-encode3 :: Natural.Natural -> [Byte.Byte]-} encode3 n = let n1 = Natural.shiftRight n 6 in let n2 = Natural.shiftRight n1 6 in let b0 = Byte.or 224 (Byte.fromNatural n2) in let b1 = Byte.or 128 (Byte.fromNatural (Bits.bound n1 6)) in let b2 = Byte.or 128 (Byte.fromNatural (Bits.bound n 6)) in b0 : b1 : b2 : [] {-encode4 :: Natural.Natural -> [Byte.Byte]-} encode4 n = let n1 = Natural.shiftRight n 6 in let n2 = Natural.shiftRight n1 6 in let n3 = Natural.shiftRight n2 6 in let b0 = Byte.or 240 (Byte.fromNatural n3) in let b1 = Byte.or 128 (Byte.fromNatural (Bits.bound n2 6)) in let b2 = Byte.or 128 (Byte.fromNatural (Bits.bound n1 6)) in let b3 = Byte.or 128 (Byte.fromNatural (Bits.bound n 6)) in b0 : b1 : b2 : b3 : [] encode :: [Unicode.Unicode] -> [Byte.Byte] encode c = concat (map encodeUnicode c) reencodeUnicode :: Either Byte.Byte Unicode.Unicode -> [Byte.Byte] reencodeUnicode x = case x of Left b -> b : [] Right c -> encodeUnicode c reencode :: [Either Byte.Byte Unicode.Unicode] -> [Byte.Byte] reencode c = concat (map reencodeUnicode c)