/* * Copyright Supranational LLC * Licensed under the Apache License, Version 2.0, see LICENSE for details. * SPDX-License-Identifier: Apache-2.0 */ #include "fields.h" #ifdef __OPTIMIZE_SIZE__ static void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp) { static const byte BLS_12_381_P_minus_3_div_4[] = { TO_BYTES(0xee7fbfffffffeaaa), TO_BYTES(0x07aaffffac54ffff), TO_BYTES(0xd9cc34a83dac3d89), TO_BYTES(0xd91dd2e13ce144af), TO_BYTES(0x92c6e9ed90d2eb35), TO_BYTES(0x0680447a8e5ff9a6) }; exp_mont_384(out, inp, BLS_12_381_P_minus_3_div_4, 379, BLS12_381_P, p0); } #else # if 1 /* * "383"-bit variant omits full reductions at the ends of squarings, * which results in up to ~15% improvement. [One can improve further * by omitting full reductions even after multiplications and * performing final reduction at the very end of the chain.] */ static inline void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count, const vec384 b) { sqr_n_mul_mont_383(out, a, count, BLS12_381_P, p0, b); } # else static void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count, const vec384 b) { while(count--) { sqr_fp(out, a); a = out; } mul_fp(out, out, b); } # endif # define sqr(ret,a) sqr_fp(ret,a) # define mul(ret,a,b) mul_fp(ret,a,b) # define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b) # include "sqrt-addchain.h" static void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp) { RECIP_SQRT_MOD_BLS12_381_P(out, inp, vec384); } # undef RECIP_SQRT_MOD_BLS12_381_P # undef sqr_n_mul # undef sqr # undef mul #endif static bool_t recip_sqrt_fp(vec384 out, const vec384 inp) { vec384 t0, t1; bool_t ret; recip_sqrt_fp_3mod4(t0, inp); mul_fp(t1, t0, inp); sqr_fp(t1, t1); ret = vec_is_equal(t1, inp, sizeof(t1)); vec_copy(out, t0, sizeof(t0)); return ret; } static bool_t sqrt_fp(vec384 out, const vec384 inp) { vec384 t0, t1; bool_t ret; recip_sqrt_fp_3mod4(t0, inp); mul_fp(t0, t0, inp); sqr_fp(t1, t0); ret = vec_is_equal(t1, inp, sizeof(t1)); vec_copy(out, t0, sizeof(t0)); return ret; } int blst_fp_sqrt(vec384 out, const vec384 inp) { return (int)sqrt_fp(out, inp); } int blst_fp_is_square(const vec384 inp) { return (int)ct_is_square_mod_384(inp, BLS12_381_P); } static bool_t sqrt_align_fp2(vec384x out, const vec384x ret, const vec384x sqrt, const vec384x inp) { static const vec384x sqrt_minus_1 = { { 0 }, { ONE_MONT_P } }; static const vec384x sqrt_sqrt_minus_1 = { /* * "magic" number is ±2^((p-3)/4)%p, which is "1/sqrt(2)", * in quotes because 2*"1/sqrt(2)"^2 == -1 mod p, not 1, * but it pivots into "complex" plane nevertheless... */ { TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183), TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18), TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) }, { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } }; static const vec384x sqrt_minus_sqrt_minus_1 = { { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) }, { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } }; vec384x coeff, t0, t1; bool_t is_sqrt, flag; /* * Instead of multiple trial squarings we can perform just one * and see if the result is "rotated by multiple of 90°" in * relation to |inp|, and "rotate" |ret| accordingly. */ sqr_fp2(t0, sqrt); /* "sqrt(|inp|)"^2 = (a + b*i)^2 = (a^2-b^2) + 2ab*i */ /* (a^2-b^2) + 2ab*i == |inp| ? |ret| is spot on */ sub_fp2(t1, t0, inp); is_sqrt = vec_is_zero(t1, sizeof(t1)); vec_copy(coeff, BLS12_381_Rx.p2, sizeof(coeff)); /* -(a^2-b^2) - 2ab*i == |inp| ? "rotate |ret| by 90°" */ add_fp2(t1, t0, inp); vec_select(coeff, sqrt_minus_1, coeff, sizeof(coeff), flag = vec_is_zero(t1, sizeof(t1))); is_sqrt |= flag; /* 2ab - (a^2-b^2)*i == |inp| ? "rotate |ret| by 135°" */ sub_fp(t1[0], t0[0], inp[1]); add_fp(t1[1], t0[1], inp[0]); vec_select(coeff, sqrt_sqrt_minus_1, coeff, sizeof(coeff), flag = vec_is_zero(t1, sizeof(t1))); is_sqrt |= flag; /* -2ab + (a^2-b^2)*i == |inp| ? "rotate |ret| by 45°" */ add_fp(t1[0], t0[0], inp[1]); sub_fp(t1[1], t0[1], inp[0]); vec_select(coeff, sqrt_minus_sqrt_minus_1, coeff, sizeof(coeff), flag = vec_is_zero(t1, sizeof(t1))); is_sqrt |= flag; /* actual "rotation" */ mul_fp2(out, ret, coeff); return is_sqrt; } /* * |inp| = a + b*i */ static bool_t recip_sqrt_fp2(vec384x out, const vec384x inp, const vec384x recip_ZZZ, const vec384x magic_ZZZ) { vec384 aa, bb, cc; vec384x inp_; bool_t is_sqrt; sqr_fp(aa, inp[0]); sqr_fp(bb, inp[1]); add_fp(aa, aa, bb); is_sqrt = recip_sqrt_fp(cc, aa); /* 1/sqrt(a²+b²) */ /* if |inp| doesn't have quadratic residue, multiply by "1/Z³" ... */ mul_fp2(inp_, inp, recip_ZZZ); /* ... and adjust |aa| and |cc| accordingly */ { vec384 za, zc; mul_fp(za, aa, magic_ZZZ[0]); /* aa*(za² + zb²) */ mul_fp(zc, cc, magic_ZZZ[1]); /* cc*(za² + zb²)^((p-3)/4) */ vec_select(aa, aa, za, sizeof(aa), is_sqrt); vec_select(cc, cc, zc, sizeof(cc), is_sqrt); } vec_select(inp_, inp, inp_, sizeof(inp_), is_sqrt); mul_fp(aa, aa, cc); /* sqrt(a²+b²) */ sub_fp(bb, inp_[0], aa); add_fp(aa, inp_[0], aa); vec_select(aa, bb, aa, sizeof(aa), vec_is_zero(aa, sizeof(aa))); div_by_2_fp(aa, aa); /* (a ± sqrt(a²+b²))/2 */ /* if it says "no sqrt," final "align" will find right one... */ (void)recip_sqrt_fp(out[0], aa); /* 1/sqrt((a ± sqrt(a²+b²))/2) */ div_by_2_fp(out[1], inp_[1]); mul_fp(out[1], out[1], out[0]); /* b/(2*sqrt((a ± sqrt(a²+b²))/2)) */ mul_fp(out[0], out[0], aa); /* sqrt((a ± sqrt(a²+b²))/2) */ /* bound to succeed */ (void)sqrt_align_fp2(out, out, out, inp_); mul_fp(out[0], out[0], cc); /* inverse the result */ mul_fp(out[1], out[1], cc); neg_fp(out[1], out[1]); return is_sqrt; } static bool_t sqrt_fp2(vec384x out, const vec384x inp) { vec384x ret; vec384 aa, bb; sqr_fp(aa, inp[0]); sqr_fp(bb, inp[1]); add_fp(aa, aa, bb); /* don't pay attention to return value, final "align" will tell... */ (void)sqrt_fp(aa, aa); /* sqrt(a²+b²) */ sub_fp(bb, inp[0], aa); add_fp(aa, inp[0], aa); vec_select(aa, bb, aa, sizeof(aa), vec_is_zero(aa, sizeof(aa))); div_by_2_fp(aa, aa); /* (a ± sqrt(a²+b²))/2 */ /* if it says "no sqrt," final "align" will find right one... */ (void)recip_sqrt_fp(ret[0], aa); /* 1/sqrt((a ± sqrt(a²+b²))/2) */ div_by_2_fp(ret[1], inp[1]); mul_fp(ret[1], ret[1], ret[0]); /* b/(2*sqrt((a ± sqrt(a²+b²))/2)) */ mul_fp(ret[0], ret[0], aa); /* sqrt((a ± sqrt(a²+b²))/2) */ /* * Now see if |ret| is or can be made sqrt(|inp|)... */ return sqrt_align_fp2(out, ret, ret, inp); } int blst_fp2_sqrt(vec384x out, const vec384x inp) { return (int)sqrt_fp2(out, inp); } int blst_fp2_is_square(const vec384x inp) { vec384 aa, bb; sqr_fp(aa, inp[0]); sqr_fp(bb, inp[1]); add_fp(aa, aa, bb); return (int)ct_is_square_mod_384(aa, BLS12_381_P); }