// Start of scalar.h. // Implementation of the primitive scalar operations. Very // repetitive. This code is inserted directly into both CUDA and // OpenCL programs, as well as the CPU code, so it has some #ifdefs to // work everywhere. Some operations are defined as macros because // this allows us to use them as constant expressions in things like // array sizes and static initialisers. // Some of the #ifdefs are because OpenCL uses type-generic functions // for some operations (e.g. sqrt), while C and CUDA sensibly use // distinct functions for different precisions (e.g. sqrtf() and // sqrt()). This is quite annoying. Due to C's unfortunate casting // rules, it is also really easy to accidentally implement // floating-point functions in the wrong precision, so be careful. // Double-precision definitions are only included if the preprocessor // macro FUTHARK_F64_ENABLED is set. static inline uint8_t add8(uint8_t x, uint8_t y) { return x + y; } static inline uint16_t add16(uint16_t x, uint16_t y) { return x + y; } static inline uint32_t add32(uint32_t x, uint32_t y) { return x + y; } static inline uint64_t add64(uint64_t x, uint64_t y) { return x + y; } static inline uint8_t sub8(uint8_t x, uint8_t y) { return x - y; } static inline uint16_t sub16(uint16_t x, uint16_t y) { return x - y; } static inline uint32_t sub32(uint32_t x, uint32_t y) { return x - y; } static inline uint64_t sub64(uint64_t x, uint64_t y) { return x - y; } static inline uint8_t mul8(uint8_t x, uint8_t y) { return x * y; } static inline uint16_t mul16(uint16_t x, uint16_t y) { return x * y; } static inline uint32_t mul32(uint32_t x, uint32_t y) { return x * y; } static inline uint64_t mul64(uint64_t x, uint64_t y) { return x * y; } static inline uint8_t udiv8(uint8_t x, uint8_t y) { return x / y; } static inline uint16_t udiv16(uint16_t x, uint16_t y) { return x / y; } static inline uint32_t udiv32(uint32_t x, uint32_t y) { return x / y; } static inline uint64_t udiv64(uint64_t x, uint64_t y) { return x / y; } static inline uint8_t udiv_up8(uint8_t x, uint8_t y) { return (x + y - 1) / y; } static inline uint16_t udiv_up16(uint16_t x, uint16_t y) { return (x + y - 1) / y; } static inline uint32_t udiv_up32(uint32_t x, uint32_t y) { return (x + y - 1) / y; } static inline uint64_t udiv_up64(uint64_t x, uint64_t y) { return (x + y - 1) / y; } static inline uint8_t umod8(uint8_t x, uint8_t y) { return x % y; } static inline uint16_t umod16(uint16_t x, uint16_t y) { return x % y; } static inline uint32_t umod32(uint32_t x, uint32_t y) { return x % y; } static inline uint64_t umod64(uint64_t x, uint64_t y) { return x % y; } static inline uint8_t udiv_safe8(uint8_t x, uint8_t y) { return y == 0 ? 0 : x / y; } static inline uint16_t udiv_safe16(uint16_t x, uint16_t y) { return y == 0 ? 0 : x / y; } static inline uint32_t udiv_safe32(uint32_t x, uint32_t y) { return y == 0 ? 0 : x / y; } static inline uint64_t udiv_safe64(uint64_t x, uint64_t y) { return y == 0 ? 0 : x / y; } static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y) { return y == 0 ? 0 : (x + y - 1) / y; } static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y) { return y == 0 ? 0 : (x + y - 1) / y; } static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y) { return y == 0 ? 0 : (x + y - 1) / y; } static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y) { return y == 0 ? 0 : (x + y - 1) / y; } static inline uint8_t umod_safe8(uint8_t x, uint8_t y) { return y == 0 ? 0 : x % y; } static inline uint16_t umod_safe16(uint16_t x, uint16_t y) { return y == 0 ? 0 : x % y; } static inline uint32_t umod_safe32(uint32_t x, uint32_t y) { return y == 0 ? 0 : x % y; } static inline uint64_t umod_safe64(uint64_t x, uint64_t y) { return y == 0 ? 0 : x % y; } static inline int8_t sdiv8(int8_t x, int8_t y) { int8_t q = x / y; int8_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } static inline int16_t sdiv16(int16_t x, int16_t y) { int16_t q = x / y; int16_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } static inline int32_t sdiv32(int32_t x, int32_t y) { int32_t q = x / y; int32_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } static inline int64_t sdiv64(int64_t x, int64_t y) { int64_t q = x / y; int64_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } static inline int8_t sdiv_up8(int8_t x, int8_t y) { return sdiv8(x + y - 1, y); } static inline int16_t sdiv_up16(int16_t x, int16_t y) { return sdiv16(x + y - 1, y); } static inline int32_t sdiv_up32(int32_t x, int32_t y) { return sdiv32(x + y - 1, y); } static inline int64_t sdiv_up64(int64_t x, int64_t y) { return sdiv64(x + y - 1, y); } static inline int8_t smod8(int8_t x, int8_t y) { int8_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } static inline int16_t smod16(int16_t x, int16_t y) { int16_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } static inline int32_t smod32(int32_t x, int32_t y) { int32_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } static inline int64_t smod64(int64_t x, int64_t y) { int64_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } static inline int8_t sdiv_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : sdiv8(x, y); } static inline int16_t sdiv_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : sdiv16(x, y); } static inline int32_t sdiv_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : sdiv32(x, y); } static inline int64_t sdiv_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : sdiv64(x, y); } static inline int8_t sdiv_up_safe8(int8_t x, int8_t y) { return sdiv_safe8(x + y - 1, y); } static inline int16_t sdiv_up_safe16(int16_t x, int16_t y) { return sdiv_safe16(x + y - 1, y); } static inline int32_t sdiv_up_safe32(int32_t x, int32_t y) { return sdiv_safe32(x + y - 1, y); } static inline int64_t sdiv_up_safe64(int64_t x, int64_t y) { return sdiv_safe64(x + y - 1, y); } static inline int8_t smod_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : smod8(x, y); } static inline int16_t smod_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : smod16(x, y); } static inline int32_t smod_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : smod32(x, y); } static inline int64_t smod_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : smod64(x, y); } static inline int8_t squot8(int8_t x, int8_t y) { return x / y; } static inline int16_t squot16(int16_t x, int16_t y) { return x / y; } static inline int32_t squot32(int32_t x, int32_t y) { return x / y; } static inline int64_t squot64(int64_t x, int64_t y) { return x / y; } static inline int8_t srem8(int8_t x, int8_t y) { return x % y; } static inline int16_t srem16(int16_t x, int16_t y) { return x % y; } static inline int32_t srem32(int32_t x, int32_t y) { return x % y; } static inline int64_t srem64(int64_t x, int64_t y) { return x % y; } static inline int8_t squot_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : x / y; } static inline int16_t squot_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : x / y; } static inline int32_t squot_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : x / y; } static inline int64_t squot_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : x / y; } static inline int8_t srem_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : x % y; } static inline int16_t srem_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : x % y; } static inline int32_t srem_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : x % y; } static inline int64_t srem_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : x % y; } static inline int8_t smin8(int8_t x, int8_t y) { return x < y ? x : y; } static inline int16_t smin16(int16_t x, int16_t y) { return x < y ? x : y; } static inline int32_t smin32(int32_t x, int32_t y) { return x < y ? x : y; } static inline int64_t smin64(int64_t x, int64_t y) { return x < y ? x : y; } static inline uint8_t umin8(uint8_t x, uint8_t y) { return x < y ? x : y; } static inline uint16_t umin16(uint16_t x, uint16_t y) { return x < y ? x : y; } static inline uint32_t umin32(uint32_t x, uint32_t y) { return x < y ? x : y; } static inline uint64_t umin64(uint64_t x, uint64_t y) { return x < y ? x : y; } static inline int8_t smax8(int8_t x, int8_t y) { return x < y ? y : x; } static inline int16_t smax16(int16_t x, int16_t y) { return x < y ? y : x; } static inline int32_t smax32(int32_t x, int32_t y) { return x < y ? y : x; } static inline int64_t smax64(int64_t x, int64_t y) { return x < y ? y : x; } static inline uint8_t umax8(uint8_t x, uint8_t y) { return x < y ? y : x; } static inline uint16_t umax16(uint16_t x, uint16_t y) { return x < y ? y : x; } static inline uint32_t umax32(uint32_t x, uint32_t y) { return x < y ? y : x; } static inline uint64_t umax64(uint64_t x, uint64_t y) { return x < y ? y : x; } static inline uint8_t shl8(uint8_t x, uint8_t y) { return (uint8_t)(x << y); } static inline uint16_t shl16(uint16_t x, uint16_t y) { return (uint16_t)(x << y); } static inline uint32_t shl32(uint32_t x, uint32_t y) { return x << y; } static inline uint64_t shl64(uint64_t x, uint64_t y) { return x << y; } static inline uint8_t lshr8(uint8_t x, uint8_t y) { return x >> y; } static inline uint16_t lshr16(uint16_t x, uint16_t y) { return x >> y; } static inline uint32_t lshr32(uint32_t x, uint32_t y) { return x >> y; } static inline uint64_t lshr64(uint64_t x, uint64_t y) { return x >> y; } static inline int8_t ashr8(int8_t x, int8_t y) { return x >> y; } static inline int16_t ashr16(int16_t x, int16_t y) { return x >> y; } static inline int32_t ashr32(int32_t x, int32_t y) { return x >> y; } static inline int64_t ashr64(int64_t x, int64_t y) { return x >> y; } static inline uint8_t and8(uint8_t x, uint8_t y) { return x & y; } static inline uint16_t and16(uint16_t x, uint16_t y) { return x & y; } static inline uint32_t and32(uint32_t x, uint32_t y) { return x & y; } static inline uint64_t and64(uint64_t x, uint64_t y) { return x & y; } static inline uint8_t or8(uint8_t x, uint8_t y) { return x | y; } static inline uint16_t or16(uint16_t x, uint16_t y) { return x | y; } static inline uint32_t or32(uint32_t x, uint32_t y) { return x | y; } static inline uint64_t or64(uint64_t x, uint64_t y) { return x | y; } static inline uint8_t xor8(uint8_t x, uint8_t y) { return x ^ y; } static inline uint16_t xor16(uint16_t x, uint16_t y) { return x ^ y; } static inline uint32_t xor32(uint32_t x, uint32_t y) { return x ^ y; } static inline uint64_t xor64(uint64_t x, uint64_t y) { return x ^ y; } static inline bool ult8(uint8_t x, uint8_t y) { return x < y; } static inline bool ult16(uint16_t x, uint16_t y) { return x < y; } static inline bool ult32(uint32_t x, uint32_t y) { return x < y; } static inline bool ult64(uint64_t x, uint64_t y) { return x < y; } static inline bool ule8(uint8_t x, uint8_t y) { return x <= y; } static inline bool ule16(uint16_t x, uint16_t y) { return x <= y; } static inline bool ule32(uint32_t x, uint32_t y) { return x <= y; } static inline bool ule64(uint64_t x, uint64_t y) { return x <= y; } static inline bool slt8(int8_t x, int8_t y) { return x < y; } static inline bool slt16(int16_t x, int16_t y) { return x < y; } static inline bool slt32(int32_t x, int32_t y) { return x < y; } static inline bool slt64(int64_t x, int64_t y) { return x < y; } static inline bool sle8(int8_t x, int8_t y) { return x <= y; } static inline bool sle16(int16_t x, int16_t y) { return x <= y; } static inline bool sle32(int32_t x, int32_t y) { return x <= y; } static inline bool sle64(int64_t x, int64_t y) { return x <= y; } static inline uint8_t pow8(uint8_t x, uint8_t y) { uint8_t res = 1, rem = y; while (rem != 0) { if (rem & 1) res *= x; rem >>= 1; x *= x; } return res; } static inline uint16_t pow16(uint16_t x, uint16_t y) { uint16_t res = 1, rem = y; while (rem != 0) { if (rem & 1) res *= x; rem >>= 1; x *= x; } return res; } static inline uint32_t pow32(uint32_t x, uint32_t y) { uint32_t res = 1, rem = y; while (rem != 0) { if (rem & 1) res *= x; rem >>= 1; x *= x; } return res; } static inline uint64_t pow64(uint64_t x, uint64_t y) { uint64_t res = 1, rem = y; while (rem != 0) { if (rem & 1) res *= x; rem >>= 1; x *= x; } return res; } static inline bool itob_i8_bool(int8_t x) { return x; } static inline bool itob_i16_bool(int16_t x) { return x; } static inline bool itob_i32_bool(int32_t x) { return x; } static inline bool itob_i64_bool(int64_t x) { return x; } static inline int8_t btoi_bool_i8(bool x) { return x; } static inline int16_t btoi_bool_i16(bool x) { return x; } static inline int32_t btoi_bool_i32(bool x) { return x; } static inline int64_t btoi_bool_i64(bool x) { return x; } #define sext_i8_i8(x) ((int8_t) (int8_t) (x)) #define sext_i8_i16(x) ((int16_t) (int8_t) (x)) #define sext_i8_i32(x) ((int32_t) (int8_t) (x)) #define sext_i8_i64(x) ((int64_t) (int8_t) (x)) #define sext_i16_i8(x) ((int8_t) (int16_t) (x)) #define sext_i16_i16(x) ((int16_t) (int16_t) (x)) #define sext_i16_i32(x) ((int32_t) (int16_t) (x)) #define sext_i16_i64(x) ((int64_t) (int16_t) (x)) #define sext_i32_i8(x) ((int8_t) (int32_t) (x)) #define sext_i32_i16(x) ((int16_t) (int32_t) (x)) #define sext_i32_i32(x) ((int32_t) (int32_t) (x)) #define sext_i32_i64(x) ((int64_t) (int32_t) (x)) #define sext_i64_i8(x) ((int8_t) (int64_t) (x)) #define sext_i64_i16(x) ((int16_t) (int64_t) (x)) #define sext_i64_i32(x) ((int32_t) (int64_t) (x)) #define sext_i64_i64(x) ((int64_t) (int64_t) (x)) #define zext_i8_i8(x) ((int8_t) (uint8_t) (x)) #define zext_i8_i16(x) ((int16_t) (uint8_t) (x)) #define zext_i8_i32(x) ((int32_t) (uint8_t) (x)) #define zext_i8_i64(x) ((int64_t) (uint8_t) (x)) #define zext_i16_i8(x) ((int8_t) (uint16_t) (x)) #define zext_i16_i16(x) ((int16_t) (uint16_t) (x)) #define zext_i16_i32(x) ((int32_t) (uint16_t) (x)) #define zext_i16_i64(x) ((int64_t) (uint16_t) (x)) #define zext_i32_i8(x) ((int8_t) (uint32_t) (x)) #define zext_i32_i16(x) ((int16_t) (uint32_t) (x)) #define zext_i32_i32(x) ((int32_t) (uint32_t) (x)) #define zext_i32_i64(x) ((int64_t) (uint32_t) (x)) #define zext_i64_i8(x) ((int8_t) (uint64_t) (x)) #define zext_i64_i16(x) ((int16_t) (uint64_t) (x)) #define zext_i64_i32(x) ((int32_t) (uint64_t) (x)) #define zext_i64_i64(x) ((int64_t) (uint64_t) (x)) static int8_t abs8(int8_t x) { return (int8_t)abs(x); } static int16_t abs16(int16_t x) { return (int16_t)abs(x); } static int32_t abs32(int32_t x) { return abs(x); } static int64_t abs64(int64_t x) { #if defined(__OPENCL_VERSION__) return abs(x); #else return llabs(x); #endif } #if defined(__OPENCL_VERSION__) static int32_t futrts_popc8(int8_t x) { return popcount(x); } static int32_t futrts_popc16(int16_t x) { return popcount(x); } static int32_t futrts_popc32(int32_t x) { return popcount(x); } static int32_t futrts_popc64(int64_t x) { return popcount(x); } #elif defined(__CUDA_ARCH__) static int32_t futrts_popc8(int8_t x) { return __popc(zext_i8_i32(x)); } static int32_t futrts_popc16(int16_t x) { return __popc(zext_i16_i32(x)); } static int32_t futrts_popc32(int32_t x) { return __popc(x); } static int32_t futrts_popc64(int64_t x) { return __popcll(x); } #else // Not OpenCL or CUDA, but plain C. static int32_t futrts_popc8(uint8_t x) { int c = 0; for (; x; ++c) { x &= x - 1; } return c; } static int32_t futrts_popc16(uint16_t x) { int c = 0; for (; x; ++c) { x &= x - 1; } return c; } static int32_t futrts_popc32(uint32_t x) { int c = 0; for (; x; ++c) { x &= x - 1; } return c; } static int32_t futrts_popc64(uint64_t x) { int c = 0; for (; x; ++c) { x &= x - 1; } return c; } #endif #if defined(__OPENCL_VERSION__) static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) { return mul_hi(a, b); } static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) { return mul_hi(a, b); } static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) { return mul_hi(a, b); } static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) { return mul_hi(a, b); } #elif defined(__CUDA_ARCH__) static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) { uint16_t aa = a; uint16_t bb = b; return aa * bb >> 8; } static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) { uint32_t aa = a; uint32_t bb = b; return aa * bb >> 16; } static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) { return mulhi(a, b); } static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) { return mul64hi(a, b); } #else // Not OpenCL or CUDA, but plain C. static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) { uint16_t aa = a; uint16_t bb = b; return aa * bb >> 8; } static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) { uint32_t aa = a; uint32_t bb = b; return aa * bb >> 16; } static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) { uint64_t aa = a; uint64_t bb = b; return aa * bb >> 32; } static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) { __uint128_t aa = a; __uint128_t bb = b; return aa * bb >> 64; } #endif #if defined(__OPENCL_VERSION__) static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) { return mad_hi(a, b, c); } static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) { return mad_hi(a, b, c); } static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) { return mad_hi(a, b, c); } static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) { return mad_hi(a, b, c); } #else // Not OpenCL static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) { return futrts_mul_hi8(a, b) + c; } static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) { return futrts_mul_hi16(a, b) + c; } static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) { return futrts_mul_hi32(a, b) + c; } static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) { return futrts_mul_hi64(a, b) + c; } #endif #if defined(__OPENCL_VERSION__) static int32_t futrts_clzz8(int8_t x) { return clz(x); } static int32_t futrts_clzz16(int16_t x) { return clz(x); } static int32_t futrts_clzz32(int32_t x) { return clz(x); } static int32_t futrts_clzz64(int64_t x) { return clz(x); } #elif defined(__CUDA_ARCH__) static int32_t futrts_clzz8(int8_t x) { return __clz(zext_i8_i32(x)) - 24; } static int32_t futrts_clzz16(int16_t x) { return __clz(zext_i16_i32(x)) - 16; } static int32_t futrts_clzz32(int32_t x) { return __clz(x); } static int32_t futrts_clzz64(int64_t x) { return __clzll(x); } #else // Not OpenCL or CUDA, but plain C. static int32_t futrts_clzz8(int8_t x) { return x == 0 ? 8 : __builtin_clz((uint32_t)zext_i8_i32(x)) - 24; } static int32_t futrts_clzz16(int16_t x) { return x == 0 ? 16 : __builtin_clz((uint32_t)zext_i16_i32(x)) - 16; } static int32_t futrts_clzz32(int32_t x) { return x == 0 ? 32 : __builtin_clz((uint32_t)x); } static int32_t futrts_clzz64(int64_t x) { return x == 0 ? 64 : __builtin_clzll((uint64_t)x); } #endif #if defined(__OPENCL_VERSION__) static int32_t futrts_ctzz8(int8_t x) { int i = 0; for (; i < 8 && (x & 1) == 0; i++, x >>= 1) ; return i; } static int32_t futrts_ctzz16(int16_t x) { int i = 0; for (; i < 16 && (x & 1) == 0; i++, x >>= 1) ; return i; } static int32_t futrts_ctzz32(int32_t x) { int i = 0; for (; i < 32 && (x & 1) == 0; i++, x >>= 1) ; return i; } static int32_t futrts_ctzz64(int64_t x) { int i = 0; for (; i < 64 && (x & 1) == 0; i++, x >>= 1) ; return i; } #elif defined(__CUDA_ARCH__) static int32_t futrts_ctzz8(int8_t x) { int y = __ffs(x); return y == 0 ? 8 : y - 1; } static int32_t futrts_ctzz16(int16_t x) { int y = __ffs(x); return y == 0 ? 16 : y - 1; } static int32_t futrts_ctzz32(int32_t x) { int y = __ffs(x); return y == 0 ? 32 : y - 1; } static int32_t futrts_ctzz64(int64_t x) { int y = __ffsll(x); return y == 0 ? 64 : y - 1; } #else // Not OpenCL or CUDA, but plain C. static int32_t futrts_ctzz8(int8_t x) { return x == 0 ? 8 : __builtin_ctz((uint32_t)x); } static int32_t futrts_ctzz16(int16_t x) { return x == 0 ? 16 : __builtin_ctz((uint32_t)x); } static int32_t futrts_ctzz32(int32_t x) { return x == 0 ? 32 : __builtin_ctz((uint32_t)x); } static int32_t futrts_ctzz64(int64_t x) { return x == 0 ? 64 : __builtin_ctzll((uint64_t)x); } #endif static inline float fdiv32(float x, float y) { return x / y; } static inline float fadd32(float x, float y) { return x + y; } static inline float fsub32(float x, float y) { return x - y; } static inline float fmul32(float x, float y) { return x * y; } static inline bool cmplt32(float x, float y) { return x < y; } static inline bool cmple32(float x, float y) { return x <= y; } static inline float sitofp_i8_f32(int8_t x) { return (float) x; } static inline float sitofp_i16_f32(int16_t x) { return (float) x; } static inline float sitofp_i32_f32(int32_t x) { return (float) x; } static inline float sitofp_i64_f32(int64_t x) { return (float) x; } static inline float uitofp_i8_f32(uint8_t x) { return (float) x; } static inline float uitofp_i16_f32(uint16_t x) { return (float) x; } static inline float uitofp_i32_f32(uint32_t x) { return (float) x; } static inline float uitofp_i64_f32(uint64_t x) { return (float) x; } static inline int8_t fptosi_f32_i8(float x) { return (int8_t) x; } static inline int16_t fptosi_f32_i16(float x) { return (int16_t) x; } static inline int32_t fptosi_f32_i32(float x) { return (int32_t) x; } static inline int64_t fptosi_f32_i64(float x) { return (int64_t) x; } static inline uint8_t fptoui_f32_i8(float x) { return (uint8_t) x; } static inline uint16_t fptoui_f32_i16(float x) { return (uint16_t) x; } static inline uint32_t fptoui_f32_i32(float x) { return (uint32_t) x; } static inline uint64_t fptoui_f32_i64(float x) { return (uint64_t) x; } #ifdef __OPENCL_VERSION__ static inline float fabs32(float x) { return fabs(x); } static inline float fmax32(float x, float y) { return fmax(x, y); } static inline float fmin32(float x, float y) { return fmin(x, y); } static inline float fpow32(float x, float y) { return pow(x, y); } #else // Not OpenCL, but CUDA or plain C. static inline float fabs32(float x) { return fabsf(x); } static inline float fmax32(float x, float y) { return fmaxf(x, y); } static inline float fmin32(float x, float y) { return fminf(x, y); } static inline float fpow32(float x, float y) { return powf(x, y); } #endif static inline bool futrts_isnan32(float x) { return isnan(x); } static inline bool futrts_isinf32(float x) { return isinf(x); } #ifdef __OPENCL_VERSION__ static inline float futrts_log32(float x) { return log(x); } static inline float futrts_log2_32(float x) { return log2(x); } static inline float futrts_log10_32(float x) { return log10(x); } static inline float futrts_sqrt32(float x) { return sqrt(x); } static inline float futrts_exp32(float x) { return exp(x); } static inline float futrts_cos32(float x) { return cos(x); } static inline float futrts_sin32(float x) { return sin(x); } static inline float futrts_tan32(float x) { return tan(x); } static inline float futrts_acos32(float x) { return acos(x); } static inline float futrts_asin32(float x) { return asin(x); } static inline float futrts_atan32(float x) { return atan(x); } static inline float futrts_cosh32(float x) { return cosh(x); } static inline float futrts_sinh32(float x) { return sinh(x); } static inline float futrts_tanh32(float x) { return tanh(x); } static inline float futrts_acosh32(float x) { return acosh(x); } static inline float futrts_asinh32(float x) { return asinh(x); } static inline float futrts_atanh32(float x) { return atanh(x); } static inline float futrts_atan2_32(float x, float y) { return atan2(x, y); } static inline float futrts_hypot32(float x, float y) { return hypot(x, y); } static inline float futrts_gamma32(float x) { return tgamma(x); } static inline float futrts_lgamma32(float x) { return lgamma(x); } static inline float fmod32(float x, float y) { return fmod(x, y); } static inline float futrts_round32(float x) { return rint(x); } static inline float futrts_floor32(float x) { return floor(x); } static inline float futrts_ceil32(float x) { return ceil(x); } static inline float futrts_lerp32(float v0, float v1, float t) { return mix(v0, v1, t); } static inline float futrts_mad32(float a, float b, float c) { return mad(a, b, c); } static inline float futrts_fma32(float a, float b, float c) { return fma(a, b, c); } #else // Not OpenCL, but CUDA or plain C. static inline float futrts_log32(float x) { return logf(x); } static inline float futrts_log2_32(float x) { return log2f(x); } static inline float futrts_log10_32(float x) { return log10f(x); } static inline float futrts_sqrt32(float x) { return sqrtf(x); } static inline float futrts_exp32(float x) { return expf(x); } static inline float futrts_cos32(float x) { return cosf(x); } static inline float futrts_sin32(float x) { return sinf(x); } static inline float futrts_tan32(float x) { return tanf(x); } static inline float futrts_acos32(float x) { return acosf(x); } static inline float futrts_asin32(float x) { return asinf(x); } static inline float futrts_atan32(float x) { return atanf(x); } static inline float futrts_cosh32(float x) { return coshf(x); } static inline float futrts_sinh32(float x) { return sinhf(x); } static inline float futrts_tanh32(float x) { return tanhf(x); } static inline float futrts_acosh32(float x) { return acoshf(x); } static inline float futrts_asinh32(float x) { return asinhf(x); } static inline float futrts_atanh32(float x) { return atanhf(x); } static inline float futrts_atan2_32(float x, float y) { return atan2f(x, y); } static inline float futrts_hypot32(float x, float y) { return hypotf(x, y); } static inline float futrts_gamma32(float x) { return tgammaf(x); } static inline float futrts_lgamma32(float x) { return lgammaf(x); } static inline float fmod32(float x, float y) { return fmodf(x, y); } static inline float futrts_round32(float x) { return rintf(x); } static inline float futrts_floor32(float x) { return floorf(x); } static inline float futrts_ceil32(float x) { return ceilf(x); } static inline float futrts_lerp32(float v0, float v1, float t) { return v0 + (v1 - v0) * t; } static inline float futrts_mad32(float a, float b, float c) { return a * b + c; } static inline float futrts_fma32(float a, float b, float c) { return fmaf(a, b, c); } #endif static inline int32_t futrts_to_bits32(float x) { union { float f; int32_t t; } p; p.f = x; return p.t; } static inline float futrts_from_bits32(int32_t x) { union { int32_t f; float t; } p; p.f = x; return p.t; } static inline float fsignum32(float x) { return futrts_isnan32(x) ? x : (x > 0) - (x < 0); } #ifdef FUTHARK_F64_ENABLED static inline double fdiv64(double x, double y) { return x / y; } static inline double fadd64(double x, double y) { return x + y; } static inline double fsub64(double x, double y) { return x - y; } static inline double fmul64(double x, double y) { return x * y; } static inline bool cmplt64(double x, double y) { return x < y; } static inline bool cmple64(double x, double y) { return x <= y; } static inline double sitofp_i8_f64(int8_t x) { return (double) x; } static inline double sitofp_i16_f64(int16_t x) { return (double) x; } static inline double sitofp_i32_f64(int32_t x) { return (double) x; } static inline double sitofp_i64_f64(int64_t x) { return (double) x; } static inline double uitofp_i8_f64(uint8_t x) { return (double) x; } static inline double uitofp_i16_f64(uint16_t x) { return (double) x; } static inline double uitofp_i32_f64(uint32_t x) { return (double) x; } static inline double uitofp_i64_f64(uint64_t x) { return (double) x; } static inline int8_t fptosi_f64_i8(double x) { return (int8_t) x; } static inline int16_t fptosi_f64_i16(double x) { return (int16_t) x; } static inline int32_t fptosi_f64_i32(double x) { return (int32_t) x; } static inline int64_t fptosi_f64_i64(double x) { return (int64_t) x; } static inline uint8_t fptoui_f64_i8(double x) { return (uint8_t) x; } static inline uint16_t fptoui_f64_i16(double x) { return (uint16_t) x; } static inline uint32_t fptoui_f64_i32(double x) { return (uint32_t) x; } static inline uint64_t fptoui_f64_i64(double x) { return (uint64_t) x; } static inline double fabs64(double x) { return fabs(x); } static inline double fmax64(double x, double y) { return fmax(x, y); } static inline double fmin64(double x, double y) { return fmin(x, y); } static inline double fpow64(double x, double y) { return pow(x, y); } static inline double futrts_log64(double x) { return log(x); } static inline double futrts_log2_64(double x) { return log2(x); } static inline double futrts_log10_64(double x) { return log10(x); } static inline double futrts_sqrt64(double x) { return sqrt(x); } static inline double futrts_exp64(double x) { return exp(x); } static inline double futrts_cos64(double x) { return cos(x); } static inline double futrts_sin64(double x) { return sin(x); } static inline double futrts_tan64(double x) { return tan(x); } static inline double futrts_acos64(double x) { return acos(x); } static inline double futrts_asin64(double x) { return asin(x); } static inline double futrts_atan64(double x) { return atan(x); } static inline double futrts_cosh64(double x) { return cosh(x); } static inline double futrts_sinh64(double x) { return sinh(x); } static inline double futrts_tanh64(double x) { return tanh(x); } static inline double futrts_acosh64(double x) { return acosh(x); } static inline double futrts_asinh64(double x) { return asinh(x); } static inline double futrts_atanh64(double x) { return atanh(x); } static inline double futrts_atan2_64(double x, double y) { return atan2(x, y); } static inline double futrts_hypot64(double x, double y) { return hypot(x, y); } static inline double futrts_gamma64(double x) { return tgamma(x); } static inline double futrts_lgamma64(double x) { return lgamma(x); } static inline double futrts_fma64(double a, double b, double c) { return fma(a, b, c); } static inline double futrts_round64(double x) { return rint(x); } static inline double futrts_ceil64(double x) { return ceil(x); } static inline double futrts_floor64(double x) { return floor(x); } static inline bool futrts_isnan64(double x) { return isnan(x); } static inline bool futrts_isinf64(double x) { return isinf(x); } static inline int64_t futrts_to_bits64(double x) { union { double f; int64_t t; } p; p.f = x; return p.t; } static inline double futrts_from_bits64(int64_t x) { union { int64_t f; double t; } p; p.f = x; return p.t; } static inline double fmod64(double x, double y) { return fmod(x, y); } static inline double fsignum64(double x) { return futrts_isnan64(x) ? x : (x > 0) - (x < 0); } static inline double futrts_lerp64(double v0, double v1, double t) { #ifdef __OPENCL_VERSION__ return mix(v0, v1, t); #else return v0 + (v1 - v0) * t; #endif } static inline double futrts_mad64(double a, double b, double c) { #ifdef __OPENCL_VERSION__ return mad(a, b, c); #else return a * b + c; #endif } static inline float fpconv_f32_f32(float x) { return (float) x; } static inline double fpconv_f32_f64(float x) { return (double) x; } static inline float fpconv_f64_f32(double x) { return (float) x; } static inline double fpconv_f64_f64(double x) { return (double) x; } #endif // End of scalar.h.