//// Text I/O typedef int (*writer)(FILE*, void*); typedef int (*bin_reader)(void*); typedef int (*str_reader)(const char *, void*); struct array_reader { char* elems; int64_t n_elems_space; int64_t elem_size; int64_t n_elems_used; int64_t *shape; str_reader elem_reader; }; static void skipspaces() { int c; do { c = getchar(); } while (isspace(c)); if (c != EOF) { ungetc(c, stdin); } } static int constituent(char c) { return isalnum(c) || c == '.' || c == '-' || c == '+' || c == '_'; } // Produces an empty token only on EOF. static void next_token(char *buf, int bufsize) { start: skipspaces(); int i = 0; while (i < bufsize) { int c = getchar(); buf[i] = c; if (c == EOF) { buf[i] = 0; return; } else if (c == '-' && i == 1 && buf[0] == '-') { // Line comment, so skip to end of line and start over. for (; c != '\n' && c != EOF; c = getchar()); goto start; } else if (!constituent(c)) { if (i == 0) { // We permit single-character tokens that are not // constituents; this lets things like ']' and ',' be // tokens. buf[i+1] = 0; return; } else { ungetc(c, stdin); buf[i] = 0; return; } } i++; } buf[bufsize-1] = 0; } static int next_token_is(char *buf, int bufsize, const char* expected) { next_token(buf, bufsize); return strcmp(buf, expected) == 0; } static void remove_underscores(char *buf) { char *w = buf; for (char *r = buf; *r; r++) { if (*r != '_') { *w++ = *r; } } *w++ = 0; } static int read_str_elem(char *buf, struct array_reader *reader) { int ret; if (reader->n_elems_used == reader->n_elems_space) { reader->n_elems_space *= 2; reader->elems = (char*) realloc(reader->elems, reader->n_elems_space * reader->elem_size); } ret = reader->elem_reader(buf, reader->elems + reader->n_elems_used * reader->elem_size); if (ret == 0) { reader->n_elems_used++; } return ret; } static int read_str_array_elems(char *buf, int bufsize, struct array_reader *reader, int dims) { int ret; int first = 1; char *knows_dimsize = (char*) calloc(dims,sizeof(char)); int cur_dim = dims-1; int64_t *elems_read_in_dim = (int64_t*) calloc(dims,sizeof(int64_t)); while (1) { next_token(buf, bufsize); if (strcmp(buf, "]") == 0) { if (knows_dimsize[cur_dim]) { if (reader->shape[cur_dim] != elems_read_in_dim[cur_dim]) { ret = 1; break; } } else { knows_dimsize[cur_dim] = 1; reader->shape[cur_dim] = elems_read_in_dim[cur_dim]; } if (cur_dim == 0) { ret = 0; break; } else { cur_dim--; elems_read_in_dim[cur_dim]++; } } else if (strcmp(buf, ",") == 0) { next_token(buf, bufsize); if (strcmp(buf, "[") == 0) { if (cur_dim == dims - 1) { ret = 1; break; } first = 1; cur_dim++; elems_read_in_dim[cur_dim] = 0; } else if (cur_dim == dims - 1) { ret = read_str_elem(buf, reader); if (ret != 0) { break; } elems_read_in_dim[cur_dim]++; } else { ret = 1; break; } } else if (strlen(buf) == 0) { // EOF ret = 1; break; } else if (first) { if (strcmp(buf, "[") == 0) { if (cur_dim == dims - 1) { ret = 1; break; } cur_dim++; elems_read_in_dim[cur_dim] = 0; } else { ret = read_str_elem(buf, reader); if (ret != 0) { break; } elems_read_in_dim[cur_dim]++; first = 0; } } else { ret = 1; break; } } free(knows_dimsize); free(elems_read_in_dim); return ret; } static int read_str_empty_array(char *buf, int bufsize, const char *type_name, int64_t *shape, int64_t dims) { if (strlen(buf) == 0) { // EOF return 1; } if (strcmp(buf, "empty") != 0) { return 1; } if (!next_token_is(buf, bufsize, "(")) { return 1; } for (int i = 0; i < dims-1; i++) { if (!next_token_is(buf, bufsize, "[")) { return 1; } if (!next_token_is(buf, bufsize, "]")) { return 1; } } if (!next_token_is(buf, bufsize, type_name)) { return 1; } if (!next_token_is(buf, bufsize, ")")) { return 1; } for (int i = 0; i < dims; i++) { shape[i] = 0; } return 0; } static int read_str_array(int64_t elem_size, str_reader elem_reader, const char *type_name, void **data, int64_t *shape, int64_t dims) { int ret; struct array_reader reader; char buf[100]; int dims_seen; for (dims_seen = 0; dims_seen < dims; dims_seen++) { if (!next_token_is(buf, sizeof(buf), "[")) { break; } } if (dims_seen == 0) { return read_str_empty_array(buf, sizeof(buf), type_name, shape, dims); } if (dims_seen != dims) { return 1; } reader.shape = shape; reader.n_elems_used = 0; reader.elem_size = elem_size; reader.n_elems_space = 16; reader.elems = (char*) realloc(*data, elem_size*reader.n_elems_space); reader.elem_reader = elem_reader; ret = read_str_array_elems(buf, sizeof(buf), &reader, dims); *data = reader.elems; return ret; } #define READ_STR(MACRO, PTR, SUFFIX) \ remove_underscores(buf); \ int j; \ if (sscanf(buf, "%"MACRO"%n", (PTR*)dest, &j) == 1) { \ return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, SUFFIX) == 0); \ } else { \ return 1; \ } static int read_str_i8(char *buf, void* dest) { /* Some platforms (WINDOWS) does not support scanf %hhd or its cousin, %SCNi8. Read into int first to avoid corrupting memory. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63417 */ remove_underscores(buf); int j, x; if (sscanf(buf, "%i%n", &x, &j) == 1) { *(int8_t*)dest = x; return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, "i8") == 0); } else { return 1; } } static int read_str_u8(char *buf, void* dest) { /* Some platforms (WINDOWS) does not support scanf %hhd or its cousin, %SCNu8. Read into int first to avoid corrupting memory. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63417 */ remove_underscores(buf); int j, x; if (sscanf(buf, "%i%n", &x, &j) == 1) { *(uint8_t*)dest = x; return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, "u8") == 0); } else { return 1; } } static int read_str_i16(char *buf, void* dest) { READ_STR(SCNi16, int16_t, "i16"); } static int read_str_u16(char *buf, void* dest) { READ_STR(SCNi16, int16_t, "u16"); } static int read_str_i32(char *buf, void* dest) { READ_STR(SCNi32, int32_t, "i32"); } static int read_str_u32(char *buf, void* dest) { READ_STR(SCNi32, int32_t, "u32"); } static int read_str_i64(char *buf, void* dest) { READ_STR(SCNi64, int64_t, "i64"); } static int read_str_u64(char *buf, void* dest) { // FIXME: This is not correct, as SCNu64 only permits decimal // literals. However, SCNi64 does not handle very large numbers // correctly (it's really for signed numbers, so that's fair). READ_STR(SCNu64, uint64_t, "u64"); } static int read_str_f32(char *buf, void* dest) { remove_underscores(buf); if (strcmp(buf, "f32.nan") == 0) { *(float*)dest = NAN; return 0; } else if (strcmp(buf, "f32.inf") == 0) { *(float*)dest = INFINITY; return 0; } else if (strcmp(buf, "-f32.inf") == 0) { *(float*)dest = -INFINITY; return 0; } else { READ_STR("f", float, "f32"); } } static int read_str_f64(char *buf, void* dest) { remove_underscores(buf); if (strcmp(buf, "f64.nan") == 0) { *(double*)dest = NAN; return 0; } else if (strcmp(buf, "f64.inf") == 0) { *(double*)dest = INFINITY; return 0; } else if (strcmp(buf, "-f64.inf") == 0) { *(double*)dest = -INFINITY; return 0; } else { READ_STR("lf", double, "f64"); } } static int read_str_bool(char *buf, void* dest) { if (strcmp(buf, "true") == 0) { *(char*)dest = 1; return 0; } else if (strcmp(buf, "false") == 0) { *(char*)dest = 0; return 0; } else { return 1; } } static int write_str_i8(FILE *out, int8_t *src) { return fprintf(out, "%hhdi8", *src); } static int write_str_u8(FILE *out, uint8_t *src) { return fprintf(out, "%hhuu8", *src); } static int write_str_i16(FILE *out, int16_t *src) { return fprintf(out, "%hdi16", *src); } static int write_str_u16(FILE *out, uint16_t *src) { return fprintf(out, "%huu16", *src); } static int write_str_i32(FILE *out, int32_t *src) { return fprintf(out, "%di32", *src); } static int write_str_u32(FILE *out, uint32_t *src) { return fprintf(out, "%uu32", *src); } static int write_str_i64(FILE *out, int64_t *src) { return fprintf(out, "%"PRIi64"i64", *src); } static int write_str_u64(FILE *out, uint64_t *src) { return fprintf(out, "%"PRIu64"u64", *src); } static int write_str_f32(FILE *out, float *src) { float x = *src; if (isnan(x)) { return fprintf(out, "f32.nan"); } else if (isinf(x) && x >= 0) { return fprintf(out, "f32.inf"); } else if (isinf(x)) { return fprintf(out, "-f32.inf"); } else { return fprintf(out, "%.6ff32", x); } } static int write_str_f64(FILE *out, double *src) { double x = *src; if (isnan(x)) { return fprintf(out, "f64.nan"); } else if (isinf(x) && x >= 0) { return fprintf(out, "f64.inf"); } else if (isinf(x)) { return fprintf(out, "-f64.inf"); } else { return fprintf(out, "%.6ff64", *src); } } static int write_str_bool(FILE *out, void *src) { return fprintf(out, *(char*)src ? "true" : "false"); } //// Binary I/O #define BINARY_FORMAT_VERSION 2 #define IS_BIG_ENDIAN (!*(unsigned char *)&(uint16_t){1}) // Reading little-endian byte sequences. On big-endian hosts, we flip // the resulting bytes. static int read_byte(void* dest) { int num_elems_read = fread(dest, 1, 1, stdin); return num_elems_read == 1 ? 0 : 1; } static int read_le_2byte(void* dest) { uint16_t x; int num_elems_read = fread(&x, 2, 1, stdin); if (IS_BIG_ENDIAN) { x = (x>>8) | (x<<8); } *(uint16_t*)dest = x; return num_elems_read == 1 ? 0 : 1; } static int read_le_4byte(void* dest) { uint32_t x; int num_elems_read = fread(&x, 4, 1, stdin); if (IS_BIG_ENDIAN) { x = ((x>>24)&0xFF) | ((x>>8) &0xFF00) | ((x<<8) &0xFF0000) | ((x<<24)&0xFF000000); } *(uint32_t*)dest = x; return num_elems_read == 1 ? 0 : 1; } static int read_le_8byte(void* dest) { uint64_t x; int num_elems_read = fread(&x, 8, 1, stdin); if (IS_BIG_ENDIAN) { x = ((x>>56)&0xFFull) | ((x>>40)&0xFF00ull) | ((x>>24)&0xFF0000ull) | ((x>>8) &0xFF000000ull) | ((x<<8) &0xFF00000000ull) | ((x<<24)&0xFF0000000000ull) | ((x<<40)&0xFF000000000000ull) | ((x<<56)&0xFF00000000000000ull); } *(uint64_t*)dest = x; return num_elems_read == 1 ? 0 : 1; } static int write_byte(void* dest) { int num_elems_written = fwrite(dest, 1, 1, stdin); return num_elems_written == 1 ? 0 : 1; } static int write_le_2byte(void* dest) { uint16_t x = *(uint16_t*)dest; if (IS_BIG_ENDIAN) { x = (x>>8) | (x<<8); } int num_elems_written = fwrite(&x, 2, 1, stdin); return num_elems_written == 1 ? 0 : 1; } static int write_le_4byte(void* dest) { uint32_t x = *(uint32_t*)dest; if (IS_BIG_ENDIAN) { x = ((x>>24)&0xFF) | ((x>>8) &0xFF00) | ((x<<8) &0xFF0000) | ((x<<24)&0xFF000000); } int num_elems_written = fwrite(&x, 4, 1, stdin); return num_elems_written == 1 ? 0 : 1; } static int write_le_8byte(void* dest) { uint64_t x = *(uint64_t*)dest; if (IS_BIG_ENDIAN) { x = ((x>>56)&0xFFull) | ((x>>40)&0xFF00ull) | ((x>>24)&0xFF0000ull) | ((x>>8) &0xFF000000ull) | ((x<<8) &0xFF00000000ull) | ((x<<24)&0xFF0000000000ull) | ((x<<40)&0xFF000000000000ull) | ((x<<56)&0xFF00000000000000ull); } int num_elems_written = fwrite(&x, 8, 1, stdin); return num_elems_written == 1 ? 0 : 1; } //// Types struct primtype_info_t { const char binname[4]; // Used for parsing binary data. const char* type_name; // Same name as in Futhark. const int size; // in bytes const writer write_str; // Write in text format. const str_reader read_str; // Read in text format. const writer write_bin; // Write in binary format. const bin_reader read_bin; // Read in binary format. }; static const struct primtype_info_t i8_info = {.binname = " i8", .type_name = "i8", .size = 1, .write_str = (writer)write_str_i8, .read_str = (str_reader)read_str_i8, .write_bin = (writer)write_byte, .read_bin = (bin_reader)read_byte}; static const struct primtype_info_t i16_info = {.binname = " i16", .type_name = "i16", .size = 2, .write_str = (writer)write_str_i16, .read_str = (str_reader)read_str_i16, .write_bin = (writer)write_le_2byte, .read_bin = (bin_reader)read_le_2byte}; static const struct primtype_info_t i32_info = {.binname = " i32", .type_name = "i32", .size = 4, .write_str = (writer)write_str_i32, .read_str = (str_reader)read_str_i32, .write_bin = (writer)write_le_4byte, .read_bin = (bin_reader)read_le_4byte}; static const struct primtype_info_t i64_info = {.binname = " i64", .type_name = "i64", .size = 8, .write_str = (writer)write_str_i64, .read_str = (str_reader)read_str_i64, .write_bin = (writer)write_le_8byte, .read_bin = (bin_reader)read_le_8byte}; static const struct primtype_info_t u8_info = {.binname = " u8", .type_name = "u8", .size = 1, .write_str = (writer)write_str_u8, .read_str = (str_reader)read_str_u8, .write_bin = (writer)write_byte, .read_bin = (bin_reader)read_byte}; static const struct primtype_info_t u16_info = {.binname = " u16", .type_name = "u16", .size = 2, .write_str = (writer)write_str_u16, .read_str = (str_reader)read_str_u16, .write_bin = (writer)write_le_2byte, .read_bin = (bin_reader)read_le_2byte}; static const struct primtype_info_t u32_info = {.binname = " u32", .type_name = "u32", .size = 4, .write_str = (writer)write_str_u32, .read_str = (str_reader)read_str_u32, .write_bin = (writer)write_le_4byte, .read_bin = (bin_reader)read_le_4byte}; static const struct primtype_info_t u64_info = {.binname = " u64", .type_name = "u64", .size = 8, .write_str = (writer)write_str_u64, .read_str = (str_reader)read_str_u64, .write_bin = (writer)write_le_8byte, .read_bin = (bin_reader)read_le_8byte}; static const struct primtype_info_t f32_info = {.binname = " f32", .type_name = "f32", .size = 4, .write_str = (writer)write_str_f32, .read_str = (str_reader)read_str_f32, .write_bin = (writer)write_le_4byte, .read_bin = (bin_reader)read_le_4byte}; static const struct primtype_info_t f64_info = {.binname = " f64", .type_name = "f64", .size = 8, .write_str = (writer)write_str_f64, .read_str = (str_reader)read_str_f64, .write_bin = (writer)write_le_8byte, .read_bin = (bin_reader)read_le_8byte}; static const struct primtype_info_t bool_info = {.binname = "bool", .type_name = "bool", .size = 1, .write_str = (writer)write_str_bool, .read_str = (str_reader)read_str_bool, .write_bin = (writer)write_byte, .read_bin = (bin_reader)read_byte}; static const struct primtype_info_t* primtypes[] = { &i8_info, &i16_info, &i32_info, &i64_info, &u8_info, &u16_info, &u32_info, &u64_info, &f32_info, &f64_info, &bool_info, NULL // NULL-terminated }; // General value interface. All endian business taken care of at // lower layers. static int read_is_binary() { skipspaces(); int c = getchar(); if (c == 'b') { int8_t bin_version; int ret = read_byte(&bin_version); if (ret != 0) { panic(1, "binary-input: could not read version.\n"); } if (bin_version != BINARY_FORMAT_VERSION) { panic(1, "binary-input: File uses version %i, but I only understand version %i.\n", bin_version, BINARY_FORMAT_VERSION); } return 1; } ungetc(c, stdin); return 0; } static const struct primtype_info_t* read_bin_read_type_enum() { char read_binname[4]; int num_matched = scanf("%4c", read_binname); if (num_matched != 1) { panic(1, "binary-input: Couldn't read element type.\n"); } const struct primtype_info_t **type = primtypes; for (; *type != NULL; type++) { // I compare the 4 characters manually instead of using strncmp because // this allows any value to be used, also NULL bytes if (memcmp(read_binname, (*type)->binname, 4) == 0) { return *type; } } panic(1, "binary-input: Did not recognize the type '%s'.\n", read_binname); return NULL; } static void read_bin_ensure_scalar(const struct primtype_info_t *expected_type) { int8_t bin_dims; int ret = read_byte(&bin_dims); if (ret != 0) { panic(1, "binary-input: Couldn't get dims.\n"); } if (bin_dims != 0) { panic(1, "binary-input: Expected scalar (0 dimensions), but got array with %i dimensions.\n", bin_dims); } const struct primtype_info_t *bin_type = read_bin_read_type_enum(); if (bin_type != expected_type) { panic(1, "binary-input: Expected scalar of type %s but got scalar of type %s.\n", expected_type->type_name, bin_type->type_name); } } //// High-level interface static int read_bin_array(const struct primtype_info_t *expected_type, void **data, int64_t *shape, int64_t dims) { int ret; int8_t bin_dims; ret = read_byte(&bin_dims); if (ret != 0) { panic(1, "binary-input: Couldn't get dims.\n"); } if (bin_dims != dims) { panic(1, "binary-input: Expected %i dimensions, but got array with %i dimensions.\n", dims, bin_dims); } const struct primtype_info_t *bin_primtype = read_bin_read_type_enum(); if (expected_type != bin_primtype) { panic(1, "binary-input: Expected %iD-array with element type '%s' but got %iD-array with element type '%s'.\n", dims, expected_type->type_name, dims, bin_primtype->type_name); } uint64_t elem_count = 1; for (int i=0; isize; void* tmp = realloc(*data, elem_count * elem_size); if (tmp == NULL) { panic(1, "binary-input: Failed to allocate array of size %i.\n", elem_count * elem_size); } *data = tmp; size_t num_elems_read = fread(*data, elem_size, elem_count, stdin); if (num_elems_read != elem_count) { panic(1, "binary-input: tried to read %i elements of an array, but only got %i elements.\n", elem_count, num_elems_read); } // If we're on big endian platform we must change all multibyte elements // from using little endian to big endian if (IS_BIG_ENDIAN && elem_size != 1) { char* elems = (char*) *data; for (uint64_t i=0; isize, (str_reader)expected_type->read_str, expected_type->type_name, data, shape, dims); } else { return read_bin_array(expected_type, data, shape, dims); } } static int write_str_array(FILE *out, const struct primtype_info_t *elem_type, unsigned char *data, int64_t *shape, int8_t rank) { if (rank==0) { elem_type->write_str(out, (void*)data); } else { int64_t len = shape[0]; int64_t slice_size = 1; int64_t elem_size = elem_type->size; for (int64_t i = 1; i < rank; i++) { slice_size *= shape[i]; } if (len*slice_size == 0) { printf("empty("); for (int64_t i = 1; i < rank; i++) { printf("[]"); } printf("%s", elem_type->type_name); printf(")"); } else if (rank==1) { putchar('['); for (int64_t i = 0; i < len; i++) { elem_type->write_str(out, (void*) (data + i * elem_size)); if (i != len-1) { printf(", "); } } putchar(']'); } else { putchar('['); for (int64_t i = 0; i < len; i++) { write_str_array(out, elem_type, data + i * slice_size * elem_size, shape+1, rank-1); if (i != len-1) { printf(", "); } } putchar(']'); } } return 0; } static int write_bin_array(FILE *out, const struct primtype_info_t *elem_type, unsigned char *data, int64_t *shape, int8_t rank) { int64_t num_elems = 1; for (int64_t i = 0; i < rank; i++) { num_elems *= shape[i]; } fputc('b', out); fputc((char)BINARY_FORMAT_VERSION, out); fwrite(&rank, sizeof(int8_t), 1, out); fputs(elem_type->binname, out); fwrite(shape, sizeof(int64_t), rank, out); if (IS_BIG_ENDIAN) { for (int64_t i = 0; i < num_elems; i++) { unsigned char *elem = data+i*elem_type->size; for (int64_t j = 0; j < elem_type->size; j++) { fwrite(&elem[elem_type->size-j], 1, 1, out); } } } else { fwrite(data, elem_type->size, num_elems, out); } return 0; } static int write_array(FILE *out, int write_binary, const struct primtype_info_t *elem_type, void *data, int64_t *shape, int8_t rank) { if (write_binary) { return write_bin_array(out, elem_type, data, shape, rank); } else { return write_str_array(out, elem_type, data, shape, rank); } } static int read_scalar(const struct primtype_info_t *expected_type, void *dest) { if (!read_is_binary()) { char buf[100]; next_token(buf, sizeof(buf)); return expected_type->read_str(buf, dest); } else { read_bin_ensure_scalar(expected_type); return expected_type->read_bin(dest); } } static int write_scalar(FILE *out, int write_binary, const struct primtype_info_t *type, void *src) { if (write_binary) { return write_bin_array(out, type, src, NULL, 0); } else { return type->write_str(out, src); } }