/* * Copyright Supranational LLC * Licensed under the Apache License, Version 2.0, see LICENSE for details. * SPDX-License-Identifier: Apache-2.0 */ #include "fields.h" #ifdef __OPTIMIZE_SIZE__ /* * 608 multiplications for scalar inversion modulo BLS12-381 prime, 32% * more than corresponding optimal addition-chain, plus mispredicted * branch penalties on top of that... The addition chain below was * measured to be >50% faster. */ static void flt_reciprocal_fp(vec384 out, const vec384 inp) { static const byte BLS12_381_P_minus_2[] = { TO_BYTES(0xb9feffffffffaaa9), TO_BYTES(0x1eabfffeb153ffff), TO_BYTES(0x6730d2a0f6b0f624), TO_BYTES(0x64774b84f38512bf), TO_BYTES(0x4b1ba7b6434bacd7), TO_BYTES(0x1a0111ea397fe69a) }; exp_mont_384(out, inp, BLS12_381_P_minus_2, 381, BLS12_381_P, p0); } #else # define sqr(ret,a) sqr_fp(ret,a) # define mul(ret,a,b) mul_fp(ret,a,b) # define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b) # include "recip-addchain.h" static void flt_reciprocal_fp(vec384 out, const vec384 inp) { RECIPROCAL_MOD_BLS12_381_P(out, inp, vec384); } # undef RECIPROCAL_MOD_BLS12_381_P # undef sqr_n_mul # undef mul # undef sqr #endif static void flt_reciprocal_fp2(vec384x out, const vec384x inp) { vec384 t0, t1; /* * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i */ sqr_fp(t0, inp[0]); sqr_fp(t1, inp[1]); add_fp(t0, t0, t1); flt_reciprocal_fp(t1, t0); mul_fp(out[0], inp[0], t1); mul_fp(out[1], inp[1], t1); neg_fp(out[1], out[1]); } static void reciprocal_fp(vec384 out, const vec384 inp) { static const vec384 Px8 = { /* left-aligned value of the modulus */ TO_LIMB_T(0xcff7fffffffd5558), TO_LIMB_T(0xf55ffff58a9ffffd), TO_LIMB_T(0x39869507b587b120), TO_LIMB_T(0x23ba5c279c2895fb), TO_LIMB_T(0x58dd3db21a5d66bb), TO_LIMB_T(0xd0088f51cbff34d2) }; #ifdef __BLST_NO_ASM__ # define RRx4 BLS12_381_RR #else static const vec384 RRx4 = { /* (4<<768)%P */ TO_LIMB_T(0x5f7e7cd070d107c2), TO_LIMB_T(0xec839a9ac49c13c8), TO_LIMB_T(0x6933786f44f4ef0b), TO_LIMB_T(0xd6bf8b9c676be983), TO_LIMB_T(0xd3adaaaa4dcefb06), TO_LIMB_T(0x12601bc1d82bc175) }; #endif union { vec768 x; vec384 r[2]; } temp; ct_inverse_mod_383(temp.x, inp, BLS12_381_P, Px8); redc_mont_384(temp.r[0], temp.x, BLS12_381_P, p0); mul_mont_384(temp.r[0], temp.r[0], RRx4, BLS12_381_P, p0); #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION /* sign goes straight to flt_reciprocal */ mul_mont_384(temp.r[1], temp.r[0], inp, BLS12_381_P, p0); if (vec_is_equal(temp.r[1], BLS12_381_Rx.p, sizeof(vec384)) | vec_is_zero(temp.r[1], sizeof(vec384))) vec_copy(out, temp.r[0], sizeof(vec384)); else flt_reciprocal_fp(out, inp); #else vec_copy(out, temp.r[0], sizeof(vec384)); #endif #undef RRx4 } void blst_fp_inverse(vec384 out, const vec384 inp) { reciprocal_fp(out, inp); } void blst_fp_eucl_inverse(vec384 ret, const vec384 a) { reciprocal_fp(ret, a); } static void reciprocal_fp2(vec384x out, const vec384x inp) { vec384 t0, t1; /* * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i */ sqr_fp(t0, inp[0]); sqr_fp(t1, inp[1]); add_fp(t0, t0, t1); reciprocal_fp(t1, t0); mul_fp(out[0], inp[0], t1); mul_fp(out[1], inp[1], t1); neg_fp(out[1], out[1]); } void blst_fp2_inverse(vec384x out, const vec384x inp) { reciprocal_fp2(out, inp); } void blst_fp2_eucl_inverse(vec384x out, const vec384x inp) { reciprocal_fp2(out, inp); } static void reciprocal_fr(vec256 out, const vec256 inp) { static const vec256 rx2 = { /* left-aligned value of the modulus */ TO_LIMB_T(0xfffffffe00000002), TO_LIMB_T(0xa77b4805fffcb7fd), TO_LIMB_T(0x6673b0101343b00a), TO_LIMB_T(0xe7db4ea6533afa90), }; vec512 temp; ct_inverse_mod_256(temp, inp, BLS12_381_r, rx2); redc_mont_256(out, temp, BLS12_381_r, r0); mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0); } void blst_fr_inverse(vec256 out, const vec256 inp) { reciprocal_fr(out, inp); } void blst_fr_eucl_inverse(vec256 out, const vec256 inp) { reciprocal_fr(out, inp); }