/* * Copyright Supranational LLC * Licensed under the Apache License, Version 2.0, see LICENSE for details. * SPDX-License-Identifier: Apache-2.0 */ #include "fields.h" /* * Fp2 = Fp[u] / (u^2 + 1) * Fp6 = Fp2[v] / (v^3 - u - 1) * Fp12 = Fp6[w] / (w^2 - v) */ static inline void mul_by_u_plus_1_fp2(vec384x ret, const vec384x a) { mul_by_1_plus_i_mod_384x(ret, a, BLS12_381_P); } #if 1 && !defined(__BLST_NO_ASM__) #define __FP2x2__ /* * Fp2x2 is a "widened" version of Fp2, which allows to consolidate * reductions from several multiplications. In other words instead of * "mul_redc-mul_redc-add" we get "mul-mul-add-redc," where latter * addition is double-width... To be more specific this gives ~7-10% * faster pairing depending on platform... */ typedef vec768 vec768x[2]; static inline void add_fp2x2(vec768x ret, const vec768x a, const vec768x b) { add_mod_384x384(ret[0], a[0], b[0], BLS12_381_P); add_mod_384x384(ret[1], a[1], b[1], BLS12_381_P); } static inline void sub_fp2x2(vec768x ret, const vec768x a, const vec768x b) { sub_mod_384x384(ret[0], a[0], b[0], BLS12_381_P); sub_mod_384x384(ret[1], a[1], b[1], BLS12_381_P); } static inline void mul_by_u_plus_1_fp2x2(vec768x ret, const vec768x a) { /* caveat lector! |ret| may not be same as |a| */ sub_mod_384x384(ret[0], a[0], a[1], BLS12_381_P); add_mod_384x384(ret[1], a[0], a[1], BLS12_381_P); } static inline void redc_fp2x2(vec384x ret, const vec768x a) { redc_mont_384(ret[0], a[0], BLS12_381_P, p0); redc_mont_384(ret[1], a[1], BLS12_381_P, p0); } static void mul_fp2x2(vec768x ret, const vec384x a, const vec384x b) { #if 1 mul_382x(ret, a, b, BLS12_381_P); /* +~6% in Miller loop */ #else union { vec384 x[2]; vec768 x2; } t; add_mod_384(t.x[0], a[0], a[1], BLS12_381_P); add_mod_384(t.x[1], b[0], b[1], BLS12_381_P); mul_384(ret[1], t.x[0], t.x[1]); mul_384(ret[0], a[0], b[0]); mul_384(t.x2, a[1], b[1]); sub_mod_384x384(ret[1], ret[1], ret[0], BLS12_381_P); sub_mod_384x384(ret[1], ret[1], t.x2, BLS12_381_P); sub_mod_384x384(ret[0], ret[0], t.x2, BLS12_381_P); #endif } static void sqr_fp2x2(vec768x ret, const vec384x a) { #if 1 sqr_382x(ret, a, BLS12_381_P); /* +~5% in final exponentiation */ #else vec384 t0, t1; add_mod_384(t0, a[0], a[1], BLS12_381_P); sub_mod_384(t1, a[0], a[1], BLS12_381_P); mul_384(ret[1], a[0], a[1]); add_mod_384x384(ret[1], ret[1], ret[1], BLS12_381_P); mul_384(ret[0], t0, t1); #endif } #endif /* __FP2x2__ */ /* * Fp6 extension */ #if defined(__FP2x2__) /* ~10-13% improvement for mul_fp12 and sqr_fp12 */ typedef vec768x vec768fp6[3]; static inline void sub_fp6x2(vec768fp6 ret, const vec768fp6 a, const vec768fp6 b) { sub_fp2x2(ret[0], a[0], b[0]); sub_fp2x2(ret[1], a[1], b[1]); sub_fp2x2(ret[2], a[2], b[2]); } static void mul_fp6x2(vec768fp6 ret, const vec384fp6 a, const vec384fp6 b) { vec768x t0, t1, t2; vec384x aa, bb; mul_fp2x2(t0, a[0], b[0]); mul_fp2x2(t1, a[1], b[1]); mul_fp2x2(t2, a[2], b[2]); /* ret[0] = ((a1 + a2)*(b1 + b2) - a1*b1 - a2*b2)*(u+1) + a0*b0 = (a1*b2 + a2*b1)*(u+1) + a0*b0 */ add_fp2(aa, a[1], a[2]); add_fp2(bb, b[1], b[2]); mul_fp2x2(ret[0], aa, bb); sub_fp2x2(ret[0], ret[0], t1); sub_fp2x2(ret[0], ret[0], t2); mul_by_u_plus_1_fp2x2(ret[1], ret[0]); /* borrow ret[1] for a moment */ add_fp2x2(ret[0], ret[1], t0); /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*b2*(u+1) = a0*b1 + a1*b0 + a2*b2*(u+1) */ add_fp2(aa, a[0], a[1]); add_fp2(bb, b[0], b[1]); mul_fp2x2(ret[1], aa, bb); sub_fp2x2(ret[1], ret[1], t0); sub_fp2x2(ret[1], ret[1], t1); mul_by_u_plus_1_fp2x2(ret[2], t2); /* borrow ret[2] for a moment */ add_fp2x2(ret[1], ret[1], ret[2]); /* ret[2] = (a0 + a2)*(b0 + b2) - a0*b0 - a2*b2 + a1*b1 = a0*b2 + a2*b0 + a1*b1 */ add_fp2(aa, a[0], a[2]); add_fp2(bb, b[0], b[2]); mul_fp2x2(ret[2], aa, bb); sub_fp2x2(ret[2], ret[2], t0); sub_fp2x2(ret[2], ret[2], t2); add_fp2x2(ret[2], ret[2], t1); } static inline void redc_fp6x2(vec384fp6 ret, const vec768fp6 a) { redc_fp2x2(ret[0], a[0]); redc_fp2x2(ret[1], a[1]); redc_fp2x2(ret[2], a[2]); } static void mul_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) { vec768fp6 r; mul_fp6x2(r, a, b); redc_fp6x2(ret, r); /* narrow to normal width */ } static void sqr_fp6(vec384fp6 ret, const vec384fp6 a) { vec768x s0, m01, m12, s2, rx; sqr_fp2x2(s0, a[0]); mul_fp2x2(m01, a[0], a[1]); add_fp2x2(m01, m01, m01); mul_fp2x2(m12, a[1], a[2]); add_fp2x2(m12, m12, m12); sqr_fp2x2(s2, a[2]); /* ret[2] = (a0 + a1 + a2)^2 - a0^2 - a2^2 - 2*(a0*a1) - 2*(a1*a2) = a1^2 + 2*(a0*a2) */ add_fp2(ret[2], a[2], a[1]); add_fp2(ret[2], ret[2], a[0]); sqr_fp2x2(rx, ret[2]); sub_fp2x2(rx, rx, s0); sub_fp2x2(rx, rx, s2); sub_fp2x2(rx, rx, m01); sub_fp2x2(rx, rx, m12); redc_fp2x2(ret[2], rx); /* ret[0] = a0^2 + 2*(a1*a2)*(u+1) */ mul_by_u_plus_1_fp2x2(rx, m12); add_fp2x2(rx, rx, s0); redc_fp2x2(ret[0], rx); /* ret[1] = a2^2*(u+1) + 2*(a0*a1) */ mul_by_u_plus_1_fp2x2(rx, s2); add_fp2x2(rx, rx, m01); redc_fp2x2(ret[1], rx); } #else static void mul_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) { vec384x t0, t1, t2, t3, t4, t5; mul_fp2(t0, a[0], b[0]); mul_fp2(t1, a[1], b[1]); mul_fp2(t2, a[2], b[2]); /* ret[0] = ((a1 + a2)*(b1 + b2) - a1*b1 - a2*b2)*(u+1) + a0*b0 = (a1*b2 + a2*b1)*(u+1) + a0*b0 */ add_fp2(t4, a[1], a[2]); add_fp2(t5, b[1], b[2]); mul_fp2(t3, t4, t5); sub_fp2(t3, t3, t1); sub_fp2(t3, t3, t2); mul_by_u_plus_1_fp2(t3, t3); /* add_fp2(ret[0], t3, t0); considering possible aliasing... */ /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*b2*(u+1) = a0*b1 + a1*b0 + a2*b2*(u+1) */ add_fp2(t4, a[0], a[1]); add_fp2(t5, b[0], b[1]); mul_fp2(ret[1], t4, t5); sub_fp2(ret[1], ret[1], t0); sub_fp2(ret[1], ret[1], t1); mul_by_u_plus_1_fp2(t4, t2); add_fp2(ret[1], ret[1], t4); /* ret[2] = (a0 + a2)*(b0 + b2) - a0*b0 - a2*b2 + a1*b1 = a0*b2 + a2*b0 + a1*b1 */ add_fp2(t4, a[0], a[2]); add_fp2(t5, b[0], b[2]); mul_fp2(ret[2], t4, t5); sub_fp2(ret[2], ret[2], t0); sub_fp2(ret[2], ret[2], t2); add_fp2(ret[2], ret[2], t1); add_fp2(ret[0], t3, t0); /* ... moved from above */ } static void sqr_fp6(vec384fp6 ret, const vec384fp6 a) { vec384x s0, m01, m12, s2; sqr_fp2(s0, a[0]); mul_fp2(m01, a[0], a[1]); add_fp2(m01, m01, m01); mul_fp2(m12, a[1], a[2]); add_fp2(m12, m12, m12); sqr_fp2(s2, a[2]); /* ret[2] = (a0 + a1 + a2)^2 - a0^2 - a2^2 - 2*(a0*a1) - 2*(a1*a2) = a1^2 + 2*(a0*a2) */ add_fp2(ret[2], a[2], a[1]); add_fp2(ret[2], ret[2], a[0]); sqr_fp2(ret[2], ret[2]); sub_fp2(ret[2], ret[2], s0); sub_fp2(ret[2], ret[2], s2); sub_fp2(ret[2], ret[2], m01); sub_fp2(ret[2], ret[2], m12); /* ret[0] = a0^2 + 2*(a1*a2)*(u+1) */ mul_by_u_plus_1_fp2(ret[0], m12); add_fp2(ret[0], ret[0], s0); /* ret[1] = a2^2*(u+1) + 2*(a0*a1) */ mul_by_u_plus_1_fp2(ret[1], s2); add_fp2(ret[1], ret[1], m01); } #endif static void add_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) { add_fp2(ret[0], a[0], b[0]); add_fp2(ret[1], a[1], b[1]); add_fp2(ret[2], a[2], b[2]); } static void sub_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) { sub_fp2(ret[0], a[0], b[0]); sub_fp2(ret[1], a[1], b[1]); sub_fp2(ret[2], a[2], b[2]); } static void neg_fp6(vec384fp6 ret, const vec384fp6 a) { neg_fp2(ret[0], a[0]); neg_fp2(ret[1], a[1]); neg_fp2(ret[2], a[2]); } #if 0 #define mul_by_v_fp6 mul_by_v_fp6 static void mul_by_v_fp6(vec384fp6 ret, const vec384fp6 a) { vec384x t; mul_by_u_plus_1_fp2(t, a[2]); vec_copy(ret[2], a[1], sizeof(a[1])); vec_copy(ret[1], a[0], sizeof(a[0])); vec_copy(ret[0], t, sizeof(t)); } #endif /* * Fp12 extension */ #if defined(__FP2x2__) static void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b) { vec768fp6 t0, t1, rx; vec384fp6 t2; mul_fp6x2(t0, a[0], b[0]); mul_fp6x2(t1, a[1], b[1]); /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 */ add_fp6(t2, a[0], a[1]); add_fp6(ret[1], b[0], b[1]); mul_fp6x2(rx, ret[1], t2); sub_fp6x2(rx, rx, t0); sub_fp6x2(rx, rx, t1); redc_fp6x2(ret[1], rx); /* ret[0] = a0*b0 + a1*b1*v */ mul_by_u_plus_1_fp2x2(rx[0], t1[2]); add_fp2x2(rx[0], t0[0], rx[0]); add_fp2x2(rx[1], t0[1], t1[0]); add_fp2x2(rx[2], t0[2], t1[1]); redc_fp6x2(ret[0], rx); } static inline void mul_by_0y0_fp6x2(vec768fp6 ret, const vec384fp6 a, const vec384fp2 b) { mul_fp2x2(ret[1], a[2], b); /* borrow ret[1] for a moment */ mul_by_u_plus_1_fp2x2(ret[0], ret[1]); mul_fp2x2(ret[1], a[0], b); mul_fp2x2(ret[2], a[1], b); } static void mul_by_xy0_fp6x2(vec768fp6 ret, const vec384fp6 a, const vec384fp6 b) { vec768x t0, t1; vec384x aa, bb; mul_fp2x2(t0, a[0], b[0]); mul_fp2x2(t1, a[1], b[1]); /* ret[0] = ((a1 + a2)*(b1 + 0) - a1*b1 - a2*0)*(u+1) + a0*b0 = (a1*0 + a2*b1)*(u+1) + a0*b0 */ mul_fp2x2(ret[1], a[2], b[1]); /* borrow ret[1] for a moment */ mul_by_u_plus_1_fp2x2(ret[0], ret[1]); add_fp2x2(ret[0], ret[0], t0); /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*0*(u+1) = a0*b1 + a1*b0 + a2*0*(u+1) */ add_fp2(aa, a[0], a[1]); add_fp2(bb, b[0], b[1]); mul_fp2x2(ret[1], aa, bb); sub_fp2x2(ret[1], ret[1], t0); sub_fp2x2(ret[1], ret[1], t1); /* ret[2] = (a0 + a2)*(b0 + 0) - a0*b0 - a2*0 + a1*b1 = a0*0 + a2*b0 + a1*b1 */ mul_fp2x2(ret[2], a[2], b[0]); add_fp2x2(ret[2], ret[2], t1); } static void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp6 xy00z0) { vec768fp6 t0, t1, rr; vec384fp6 t2; mul_by_xy0_fp6x2(t0, a[0], xy00z0); mul_by_0y0_fp6x2(t1, a[1], xy00z0[2]); /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 */ vec_copy(t2[0], xy00z0[0], sizeof(t2[0])); add_fp2(t2[1], xy00z0[1], xy00z0[2]); add_fp6(ret[1], a[0], a[1]); mul_by_xy0_fp6x2(rr, ret[1], t2); sub_fp6x2(rr, rr, t0); sub_fp6x2(rr, rr, t1); redc_fp6x2(ret[1], rr); /* ret[0] = a0*b0 + a1*b1*v */ mul_by_u_plus_1_fp2x2(rr[0], t1[2]); add_fp2x2(rr[0], t0[0], rr[0]); add_fp2x2(rr[1], t0[1], t1[0]); add_fp2x2(rr[2], t0[2], t1[1]); redc_fp6x2(ret[0], rr); } #else static void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b) { vec384fp6 t0, t1, t2; mul_fp6(t0, a[0], b[0]); mul_fp6(t1, a[1], b[1]); /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 */ add_fp6(t2, a[0], a[1]); add_fp6(ret[1], b[0], b[1]); mul_fp6(ret[1], ret[1], t2); sub_fp6(ret[1], ret[1], t0); sub_fp6(ret[1], ret[1], t1); /* ret[0] = a0*b0 + a1*b1*v */ #ifdef mul_by_v_fp6 mul_by_v_fp6(t1, t1); add_fp6(ret[0], t0, t1); #else mul_by_u_plus_1_fp2(t1[2], t1[2]); add_fp2(ret[0][0], t0[0], t1[2]); add_fp2(ret[0][1], t0[1], t1[0]); add_fp2(ret[0][2], t0[2], t1[1]); #endif } static inline void mul_by_0y0_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp2 b) { vec384x t; mul_fp2(t, a[2], b); mul_fp2(ret[2], a[1], b); mul_fp2(ret[1], a[0], b); mul_by_u_plus_1_fp2(ret[0], t); } static void mul_by_xy0_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) { vec384x t0, t1, /*t2,*/ t3, t4, t5; mul_fp2(t0, a[0], b[0]); mul_fp2(t1, a[1], b[1]); /* ret[0] = ((a1 + a2)*(b1 + 0) - a1*b1 - a2*0)*(u+1) + a0*b0 = (a1*0 + a2*b1)*(u+1) + a0*b0 */ mul_fp2(t3, a[2], b[1]); mul_by_u_plus_1_fp2(t3, t3); /* add_fp2(ret[0], t3, t0); considering possible aliasing... */ /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*0*(u+1) = a0*b1 + a1*b0 + a2*0*(u+1) */ add_fp2(t4, a[0], a[1]); add_fp2(t5, b[0], b[1]); mul_fp2(ret[1], t4, t5); sub_fp2(ret[1], ret[1], t0); sub_fp2(ret[1], ret[1], t1); /* ret[2] = (a0 + a2)*(b0 + 0) - a0*b0 - a2*0 + a1*b1 = a0*0 + a2*b0 + a1*b1 */ mul_fp2(ret[2], a[2], b[0]); add_fp2(ret[2], ret[2], t1); add_fp2(ret[0], t3, t0); /* ... moved from above */ } static void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp6 xy00z0) { vec384fp6 t0, t1, t2; mul_by_xy0_fp6(t0, a[0], xy00z0); mul_by_0y0_fp6(t1, a[1], xy00z0[2]); /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 */ vec_copy(t2[0], xy00z0[0], sizeof(t2[0])); add_fp2(t2[1], xy00z0[1], xy00z0[2]); add_fp6(ret[1], a[0], a[1]); mul_by_xy0_fp6(ret[1], ret[1], t2); sub_fp6(ret[1], ret[1], t0); sub_fp6(ret[1], ret[1], t1); /* ret[0] = a0*b0 + a1*b1*v */ #ifdef mul_by_v_fp6 mul_by_v_fp6(t1, t1); add_fp6(ret[0], t0, t1); #else mul_by_u_plus_1_fp2(t1[2], t1[2]); add_fp2(ret[0][0], t0[0], t1[2]); add_fp2(ret[0][1], t0[1], t1[0]); add_fp2(ret[0][2], t0[2], t1[1]); #endif } #endif static void sqr_fp12(vec384fp12 ret, const vec384fp12 a) { vec384fp6 t0, t1; add_fp6(t0, a[0], a[1]); #ifdef mul_by_v_fp6 mul_by_v_fp6(t1, a[1]); add_fp6(t1, a[0], t1); #else mul_by_u_plus_1_fp2(t1[2], a[1][2]); add_fp2(t1[0], a[0][0], t1[2]); add_fp2(t1[1], a[0][1], a[1][0]); add_fp2(t1[2], a[0][2], a[1][1]); #endif mul_fp6(t0, t0, t1); mul_fp6(t1, a[0], a[1]); /* ret[1] = 2*(a0*a1) */ add_fp6(ret[1], t1, t1); /* ret[0] = (a0 + a1)*(a0 + a1*v) - a0*a1 - a0*a1*v = a0^2 + a1^2*v */ sub_fp6(ret[0], t0, t1); #ifdef mul_by_v_fp6 mul_by_v_fp6(t1, t1); sub_fp6(ret[0], ret[0], t1); #else mul_by_u_plus_1_fp2(t1[2], t1[2]); sub_fp2(ret[0][0], ret[0][0], t1[2]); sub_fp2(ret[0][1], ret[0][1], t1[0]); sub_fp2(ret[0][2], ret[0][2], t1[1]); #endif } static void conjugate_fp12(vec384fp12 a) { neg_fp6(a[1], a[1]); } static void inverse_fp6(vec384fp6 ret, const vec384fp6 a) { vec384x c0, c1, c2, t0, t1; /* c0 = a0^2 - (a1*a2)*(u+1) */ sqr_fp2(c0, a[0]); mul_fp2(t0, a[1], a[2]); mul_by_u_plus_1_fp2(t0, t0); sub_fp2(c0, c0, t0); /* c1 = a2^2*(u+1) - (a0*a1) */ sqr_fp2(c1, a[2]); mul_by_u_plus_1_fp2(c1, c1); mul_fp2(t0, a[0], a[1]); sub_fp2(c1, c1, t0); /* c2 = a1^2 - a0*a2 */ sqr_fp2(c2, a[1]); mul_fp2(t0, a[0], a[2]); sub_fp2(c2, c2, t0); /* (a2*c1 + a1*c2)*(u+1) + a0*c0 */ mul_fp2(t0, c1, a[2]); mul_fp2(t1, c2, a[1]); add_fp2(t0, t0, t1); mul_by_u_plus_1_fp2(t0, t0); mul_fp2(t1, c0, a[0]); add_fp2(t0, t0, t1); reciprocal_fp2(t1, t0); mul_fp2(ret[0], c0, t1); mul_fp2(ret[1], c1, t1); mul_fp2(ret[2], c2, t1); } static void inverse_fp12(vec384fp12 ret, const vec384fp12 a) { vec384fp6 t0, t1; sqr_fp6(t0, a[0]); sqr_fp6(t1, a[1]); #ifdef mul_by_v_fp6 mul_by_v_fp6(t1, t1); sub_fp6(t0, t0, t1); #else mul_by_u_plus_1_fp2(t1[2], t1[2]); sub_fp2(t0[0], t0[0], t1[2]); sub_fp2(t0[1], t0[1], t1[0]); sub_fp2(t0[2], t0[2], t1[1]); #endif inverse_fp6(t1, t0); mul_fp6(ret[0], a[0], t1); mul_fp6(ret[1], a[1], t1); neg_fp6(ret[1], ret[1]); } typedef vec384x vec384fp4[2]; #if defined(__FP2x2__) static void sqr_fp4(vec384fp4 ret, const vec384x a0, const vec384x a1) { vec768x t0, t1, t2; sqr_fp2x2(t0, a0); sqr_fp2x2(t1, a1); add_fp2(ret[1], a0, a1); mul_by_u_plus_1_fp2x2(t2, t1); add_fp2x2(t2, t2, t0); redc_fp2x2(ret[0], t2); sqr_fp2x2(t2, ret[1]); sub_fp2x2(t2, t2, t0); sub_fp2x2(t2, t2, t1); redc_fp2x2(ret[1], t2); } #else static void sqr_fp4(vec384fp4 ret, const vec384x a0, const vec384x a1) { vec384x t0, t1; sqr_fp2(t0, a0); sqr_fp2(t1, a1); add_fp2(ret[1], a0, a1); mul_by_u_plus_1_fp2(ret[0], t1); add_fp2(ret[0], ret[0], t0); sqr_fp2(ret[1], ret[1]); sub_fp2(ret[1], ret[1], t0); sub_fp2(ret[1], ret[1], t1); } #endif static void cyclotomic_sqr_fp12(vec384fp12 ret, const vec384fp12 a) { vec384fp4 t0, t1, t2; sqr_fp4(t0, a[0][0], a[1][1]); sqr_fp4(t1, a[1][0], a[0][2]); sqr_fp4(t2, a[0][1], a[1][2]); sub_fp2(ret[0][0], t0[0], a[0][0]); add_fp2(ret[0][0], ret[0][0], ret[0][0]); add_fp2(ret[0][0], ret[0][0], t0[0]); sub_fp2(ret[0][1], t1[0], a[0][1]); add_fp2(ret[0][1], ret[0][1], ret[0][1]); add_fp2(ret[0][1], ret[0][1], t1[0]); sub_fp2(ret[0][2], t2[0], a[0][2]); add_fp2(ret[0][2], ret[0][2], ret[0][2]); add_fp2(ret[0][2], ret[0][2], t2[0]); mul_by_u_plus_1_fp2(t2[1], t2[1]); add_fp2(ret[1][0], t2[1], a[1][0]); add_fp2(ret[1][0], ret[1][0], ret[1][0]); add_fp2(ret[1][0], ret[1][0], t2[1]); add_fp2(ret[1][1], t0[1], a[1][1]); add_fp2(ret[1][1], ret[1][1], ret[1][1]); add_fp2(ret[1][1], ret[1][1], t0[1]); add_fp2(ret[1][2], t1[1], a[1][2]); add_fp2(ret[1][2], ret[1][2], ret[1][2]); add_fp2(ret[1][2], ret[1][2], t1[1]); } /* * caveat lector! |n| has to be non-zero and not more than 3! */ static inline void frobenius_map_fp2(vec384x ret, const vec384x a, size_t n) { vec_copy(ret[0], a[0], sizeof(ret[0])); cneg_fp(ret[1], a[1], n & 1); } static void frobenius_map_fp6(vec384fp6 ret, const vec384fp6 a, size_t n) { static const vec384x coeffs1[] = { /* (u + 1)^((P^n - 1) / 3) */ { { 0 }, { TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2), TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e), TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741) } }, { { TO_LIMB_T(0x30f1361b798a64e8), TO_LIMB_T(0xf3b8ddab7ece5a2a), TO_LIMB_T(0x16a8ca3ac61577f7), TO_LIMB_T(0xc26a2ff874fd029b), TO_LIMB_T(0x3636b76660701c6e), TO_LIMB_T(0x051ba4ab241b6160) } }, { { 0 }, { ONE_MONT_P } } }; static const vec384 coeffs2[] = { /* (u + 1)^((2P^n - 2) / 3) */ { TO_LIMB_T(0x890dc9e4867545c3), TO_LIMB_T(0x2af322533285a5d5), TO_LIMB_T(0x50880866309b7e2c), TO_LIMB_T(0xa20d1b8c7e881024), TO_LIMB_T(0x14e4f04fe2db9068), TO_LIMB_T(0x14e56d3f1564853a) }, { TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2), TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e), TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741) }, { TO_LIMB_T(0x43f5fffffffcaaae), TO_LIMB_T(0x32b7fff2ed47fffd), TO_LIMB_T(0x07e83a49a2e99d69), TO_LIMB_T(0xeca8f3318332bb7a), TO_LIMB_T(0xef148d1ea0f4c069), TO_LIMB_T(0x040ab3263eff0206) } }; frobenius_map_fp2(ret[0], a[0], n); frobenius_map_fp2(ret[1], a[1], n); frobenius_map_fp2(ret[2], a[2], n); --n; /* implied ONE_MONT_P at index 0 */ mul_fp2(ret[1], ret[1], coeffs1[n]); mul_fp(ret[2][0], ret[2][0], coeffs2[n]); mul_fp(ret[2][1], ret[2][1], coeffs2[n]); } static void frobenius_map_fp12(vec384fp12 ret, const vec384fp12 a, size_t n) { static const vec384x coeffs[] = { /* (u + 1)^((P^n - 1) / 6) */ { { TO_LIMB_T(0x07089552b319d465), TO_LIMB_T(0xc6695f92b50a8313), TO_LIMB_T(0x97e83cccd117228f), TO_LIMB_T(0xa35baecab2dc29ee), TO_LIMB_T(0x1ce393ea5daace4d), TO_LIMB_T(0x08f2220fb0fb66eb) }, { TO_LIMB_T(0xb2f66aad4ce5d646), TO_LIMB_T(0x5842a06bfc497cec), TO_LIMB_T(0xcf4895d42599d394), TO_LIMB_T(0xc11b9cba40a8e8d0), TO_LIMB_T(0x2e3813cbe5a0de89), TO_LIMB_T(0x110eefda88847faf) } }, { { TO_LIMB_T(0xecfb361b798dba3a), TO_LIMB_T(0xc100ddb891865a2c), TO_LIMB_T(0x0ec08ff1232bda8e), TO_LIMB_T(0xd5c13cc6f1ca4721), TO_LIMB_T(0x47222a47bf7b5c04), TO_LIMB_T(0x0110f184e51c5f59) } }, { { TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183), TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18), TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) }, { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } }, }; frobenius_map_fp6(ret[0], a[0], n); frobenius_map_fp6(ret[1], a[1], n); --n; /* implied ONE_MONT_P at index 0 */ mul_fp2(ret[1][0], ret[1][0], coeffs[n]); mul_fp2(ret[1][1], ret[1][1], coeffs[n]); mul_fp2(ret[1][2], ret[1][2], coeffs[n]); } /* * BLS12-381-specifc Fp12 shortcuts. */ void blst_fp12_sqr(vec384fp12 ret, const vec384fp12 a) { sqr_fp12(ret, a); } void blst_fp12_cyclotomic_sqr(vec384fp12 ret, const vec384fp12 a) { cyclotomic_sqr_fp12(ret, a); } void blst_fp12_mul(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b) { mul_fp12(ret, a, b); } void blst_fp12_mul_by_xy00z0(vec384fp12 ret, const vec384fp12 a, const vec384fp6 xy00z0) { mul_by_xy00z0_fp12(ret, a, xy00z0); } void blst_fp12_conjugate(vec384fp12 a) { conjugate_fp12(a); } void blst_fp12_inverse(vec384fp12 ret, const vec384fp12 a) { inverse_fp12(ret, a); } /* caveat lector! |n| has to be non-zero and not more than 3! */ void blst_fp12_frobenius_map(vec384fp12 ret, const vec384fp12 a, size_t n) { frobenius_map_fp12(ret, a, n); } int blst_fp12_is_equal(const vec384fp12 a, const vec384fp12 b) { return (int)vec_is_equal(a, b, sizeof(vec384fp12)); } int blst_fp12_is_one(const vec384fp12 a) { return (int)(vec_is_equal(a[0][0], BLS12_381_Rx.p2, sizeof(a[0][0])) & vec_is_zero(a[0][1], sizeof(vec384fp12) - sizeof(a[0][0]))); } const vec384fp12 *blst_fp12_one(void) { return (const vec384fp12 *)BLS12_381_Rx.p12; } void blst_bendian_from_fp12(unsigned char ret[48*12], const vec384fp12 a) { size_t i, j; vec384 out; for (i = 0; i < 3; i++) { for (j = 0; j < 2; j++) { from_fp(out, a[j][i][0]); be_bytes_from_limbs(ret, out, sizeof(vec384)); ret += 48; from_fp(out, a[j][i][1]); be_bytes_from_limbs(ret, out, sizeof(vec384)); ret += 48; } } } size_t blst_fp12_sizeof(void) { return sizeof(vec384fp12); }