/* * Copyright (c) 2020 Nikita Volkov . * * Portions copyright (c) 2011 Bryan O'Sullivan . * * Portions copyright (c) 2008-2010 Björn Höhrmann . * * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ #include #include #include #include static const char* digits = "0123456789abcdef"; #define slash_slash_seq_def '\\' | '\\' << 8 #define slash_doublequote_seq_def '\\' | '"' << 8 #define slash_n_seq_def '\\' | 'n' << 8 #define slash_r_seq_def '\\' | 'r' << 8 #define slash_t_seq_def '\\' | 't' << 8 #define slash_u_seq_def '\\' | 'u' << 8 static const uint16_t slash_u_seq = slash_u_seq_def; static const bool pass_through_by_septet[128] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; static const uint16_t two_byte_seq_by_septet[128] = {0,0,0,0,0,0,0,0,0,slash_t_seq_def,slash_n_seq_def,0,0,slash_r_seq_def,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,slash_doublequote_seq_def,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,slash_slash_seq_def}; uint8_t* encode_utf16_text ( uint8_t *dest, const uint16_t *src, size_t src_offset, size_t src_length ) { src += src_offset; const uint16_t *src_end = src + src_length; *dest++ = 34; while (src < src_end) { uint16_t x = *src++; if (x <= 0x7F) { if (pass_through_by_septet[x]) { *dest++ = x; } else { uint16_t two_byte_seq = two_byte_seq_by_septet[x]; if (two_byte_seq) { *((uint16_t*) dest) = two_byte_seq; dest += 2; } else { // \u *((uint16_t*) dest) = slash_u_seq; // hex encoding of 4 nibbles *(dest + 2) = digits[x >> 12 & 0xF]; *(dest + 3) = digits[x >> 8 & 0xF]; *(dest + 4) = digits[x >> 4 & 0xF]; *(dest + 5) = digits[x & 0xF]; dest += 6; } } } else if (x <= 0x7FF) { *((uint16_t*) dest) = (x >> 6 | x << 8) & 0x3f3f | 0x80C0; dest += 2; } else if (x < 0xD800 || x > 0xDBFF) { *dest++ = (x >> 12) | 0xE0; *dest++ = ((x >> 6) & 0x3F) | 0x80; *dest++ = (x & 0x3F) | 0x80; } else { uint32_t c = ((((uint32_t) x) - 0xD800) << 10) + (((uint32_t) *src++) - 0xDC00) + 0x10000; *dest++ = (c >> 18) | 0xF0; *dest++ = ((c >> 12) & 0x3F) | 0x80; *dest++ = ((c >> 6) & 0x3F) | 0x80; *dest++ = (c & 0x3F) | 0x80; } } *dest++ = 34; return dest; } uint8_t* encode_utf8_text ( uint8_t *dest, const uint8_t *src, size_t src_offset, size_t src_length ) { src += src_offset; const uint8_t *src_end = src + src_length; // Write double quote *dest++ = 34; while (src < src_end) { uint8_t x = *src; if (x < 0x80) { if (pass_through_by_septet[x]) { *dest++ = x; } else { uint16_t two_byte_seq = two_byte_seq_by_septet[x]; if (two_byte_seq) { *((uint16_t*) dest) = two_byte_seq; dest += 2; } else { // \u *((uint16_t*) dest) = slash_u_seq; // hex encoding of 4 nibbles *(dest + 2) = digits[x >> 12 & 0xF]; *(dest + 3) = digits[x >> 8 & 0xF]; *(dest + 4) = digits[x >> 4 & 0xF]; *(dest + 5) = digits[x & 0xF]; dest += 6; } } src++; } else if (x < 0xE0) { *dest = x; *(dest + 1) = *(src + 1); dest += 2; src += 2; } else if (x < 0xF0) { *dest = x; *(dest + 1) = *(src + 1); *(dest + 2) = *(src + 2); dest += 3; src += 3; } else { *dest = x; *(dest + 1) = *(src + 1); *(dest + 2) = *(src + 2); *(dest + 3) = *(src + 3); dest += 4; src += 4; } } *dest++ = 34; return dest; }