/* * Lowest Level MPI Algorithms * (C) 1999-2010 Jack Lloyd * 2006 Luca Piccarreta * * Botan is released under the Simplified BSD License (see license.txt) */ #ifndef BOTAN_MP_ASM_INTERNAL_H_ #define BOTAN_MP_ASM_INTERNAL_H_ #include namespace Botan { #if defined(BOTAN_MP_USE_X86_32_ASM) #define ADDSUB2_OP(OPERATION, INDEX) \ ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ #define ADDSUB3_OP(OPERATION, INDEX) \ ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ ASM("movl %[carry], 4*" #INDEX "(%[z])") \ #define LINMUL_OP(WRITE_TO, INDEX) \ ASM("movl 4*" #INDEX "(%[x]),%%eax") \ ASM("mull %[y]") \ ASM("addl %[carry],%%eax") \ ASM("adcl $0,%%edx") \ ASM("movl %%edx,%[carry]") \ ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") #define MULADD_OP(IGNORED, INDEX) \ ASM("movl 4*" #INDEX "(%[x]),%%eax") \ ASM("mull %[y]") \ ASM("addl %[carry],%%eax") \ ASM("adcl $0,%%edx") \ ASM("addl 4*" #INDEX "(%[z]),%%eax") \ ASM("adcl $0,%%edx") \ ASM("movl %%edx,%[carry]") \ ASM("movl %%eax, 4*" #INDEX " (%[z])") #define ADD_OR_SUBTRACT(CORE_CODE) \ ASM("rorl %[carry]") \ CORE_CODE \ ASM("sbbl %[carry],%[carry]") \ ASM("negl %[carry]") #elif defined(BOTAN_MP_USE_X86_64_ASM) #define ADDSUB2_OP(OPERATION, INDEX) \ ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ #define ADDSUB3_OP(OPERATION, INDEX) \ ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ ASM("movq %[carry], 8*" #INDEX "(%[z])") \ #define LINMUL_OP(WRITE_TO, INDEX) \ ASM("movq 8*" #INDEX "(%[x]),%%rax") \ ASM("mulq %[y]") \ ASM("addq %[carry],%%rax") \ ASM("adcq $0,%%rdx") \ ASM("movq %%rdx,%[carry]") \ ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") #define MULADD_OP(IGNORED, INDEX) \ ASM("movq 8*" #INDEX "(%[x]),%%rax") \ ASM("mulq %[y]") \ ASM("addq %[carry],%%rax") \ ASM("adcq $0,%%rdx") \ ASM("addq 8*" #INDEX "(%[z]),%%rax") \ ASM("adcq $0,%%rdx") \ ASM("movq %%rdx,%[carry]") \ ASM("movq %%rax, 8*" #INDEX " (%[z])") #define ADD_OR_SUBTRACT(CORE_CODE) \ ASM("rorq %[carry]") \ CORE_CODE \ ASM("sbbq %[carry],%[carry]") \ ASM("negq %[carry]") #endif #if defined(ADD_OR_SUBTRACT) #define ASM(x) x "\n\t" #define DO_8_TIMES(MACRO, ARG) \ MACRO(ARG, 0) \ MACRO(ARG, 1) \ MACRO(ARG, 2) \ MACRO(ARG, 3) \ MACRO(ARG, 4) \ MACRO(ARG, 5) \ MACRO(ARG, 6) \ MACRO(ARG, 7) #endif /* * Word Addition */ inline word word_add(word x, word y, word* carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) : [x]"=r"(x), [carry]"=r"(*carry) : "0"(x), [y]"rm"(y), "1"(*carry) : "cc"); return x; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) : [x]"=r"(x), [carry]"=r"(*carry) : "0"(x), [y]"rm"(y), "1"(*carry) : "cc"); return x; #else word z = x + y; word c1 = (z < x); z += *carry; *carry = c1 | (z < *carry); return z; #endif } /* * Eight Word Block Addition, Two Argument */ inline word word8_add2(word x[8], const word y[8], word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), "0"(carry) : "cc", "memory"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), "0"(carry) : "cc", "memory"); return carry; #else x[0] = word_add(x[0], y[0], &carry); x[1] = word_add(x[1], y[1], &carry); x[2] = word_add(x[2], y[2], &carry); x[3] = word_add(x[3], y[3], &carry); x[4] = word_add(x[4], y[4], &carry); x[5] = word_add(x[5], y[5], &carry); x[6] = word_add(x[6], y[6], &carry); x[7] = word_add(x[7], y[7], &carry); return carry; #endif } /* * Eight Word Block Addition, Three Argument */ inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) : "cc", "memory"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) : "cc", "memory"); return carry; #else z[0] = word_add(x[0], y[0], &carry); z[1] = word_add(x[1], y[1], &carry); z[2] = word_add(x[2], y[2], &carry); z[3] = word_add(x[3], y[3], &carry); z[4] = word_add(x[4], y[4], &carry); z[5] = word_add(x[5], y[5], &carry); z[6] = word_add(x[6], y[6], &carry); z[7] = word_add(x[7], y[7], &carry); return carry; #endif } /* * Word Subtraction */ inline word word_sub(word x, word y, word* carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) : [x]"=r"(x), [carry]"=r"(*carry) : "0"(x), [y]"rm"(y), "1"(*carry) : "cc"); return x; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) : [x]"=r"(x), [carry]"=r"(*carry) : "0"(x), [y]"rm"(y), "1"(*carry) : "cc"); return x; #else word t0 = x - y; word c1 = (t0 > x); word z = t0 - *carry; *carry = c1 | (z > t0); return z; #endif } /* * Eight Word Block Subtraction, Two Argument */ inline word word8_sub2(word x[8], const word y[8], word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), "0"(carry) : "cc", "memory"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), "0"(carry) : "cc", "memory"); return carry; #else x[0] = word_sub(x[0], y[0], &carry); x[1] = word_sub(x[1], y[1], &carry); x[2] = word_sub(x[2], y[2], &carry); x[3] = word_sub(x[3], y[3], &carry); x[4] = word_sub(x[4], y[4], &carry); x[5] = word_sub(x[5], y[5], &carry); x[6] = word_sub(x[6], y[6], &carry); x[7] = word_sub(x[7], y[7], &carry); return carry; #endif } /* * Eight Word Block Subtraction, Two Argument */ inline word word8_sub2_rev(word x[8], const word y[8], word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) : [carry]"=r"(carry) : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) : "cc", "memory"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) : [carry]"=r"(carry) : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) : "cc", "memory"); return carry; #else x[0] = word_sub(y[0], x[0], &carry); x[1] = word_sub(y[1], x[1], &carry); x[2] = word_sub(y[2], x[2], &carry); x[3] = word_sub(y[3], x[3], &carry); x[4] = word_sub(y[4], x[4], &carry); x[5] = word_sub(y[5], x[5], &carry); x[6] = word_sub(y[6], x[6], &carry); x[7] = word_sub(y[7], x[7], &carry); return carry; #endif } /* * Eight Word Block Subtraction, Three Argument */ inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) : "cc", "memory"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) : [carry]"=r"(carry) : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) : "cc", "memory"); return carry; #else z[0] = word_sub(x[0], y[0], &carry); z[1] = word_sub(x[1], y[1], &carry); z[2] = word_sub(x[2], y[2], &carry); z[3] = word_sub(x[3], y[3], &carry); z[4] = word_sub(x[4], y[4], &carry); z[5] = word_sub(x[5], y[5], &carry); z[6] = word_sub(x[6], y[6], &carry); z[7] = word_sub(x[7], y[7], &carry); return carry; #endif } /* * Eight Word Block Linear Multiplication */ inline word word8_linmul2(word x[8], word y, word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( DO_8_TIMES(LINMUL_OP, "x") : [carry]"=r"(carry) : [x]"r"(x), [y]"rm"(y), "0"(carry) : "cc", "%eax", "%edx"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( DO_8_TIMES(LINMUL_OP, "x") : [carry]"=r"(carry) : [x]"r"(x), [y]"rm"(y), "0"(carry) : "cc", "%rax", "%rdx"); return carry; #else x[0] = word_madd2(x[0], y, &carry); x[1] = word_madd2(x[1], y, &carry); x[2] = word_madd2(x[2], y, &carry); x[3] = word_madd2(x[3], y, &carry); x[4] = word_madd2(x[4], y, &carry); x[5] = word_madd2(x[5], y, &carry); x[6] = word_madd2(x[6], y, &carry); x[7] = word_madd2(x[7], y, &carry); return carry; #endif } /* * Eight Word Block Linear Multiplication */ inline word word8_linmul3(word z[8], const word x[8], word y, word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( DO_8_TIMES(LINMUL_OP, "z") : [carry]"=r"(carry) : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) : "cc", "%eax", "%edx"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( DO_8_TIMES(LINMUL_OP, "z") : [carry]"=r"(carry) : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) : "cc", "%rax", "%rdx"); return carry; #else z[0] = word_madd2(x[0], y, &carry); z[1] = word_madd2(x[1], y, &carry); z[2] = word_madd2(x[2], y, &carry); z[3] = word_madd2(x[3], y, &carry); z[4] = word_madd2(x[4], y, &carry); z[5] = word_madd2(x[5], y, &carry); z[6] = word_madd2(x[6], y, &carry); z[7] = word_madd2(x[7], y, &carry); return carry; #endif } /* * Eight Word Block Multiply/Add */ inline word word8_madd3(word z[8], const word x[8], word y, word carry) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm( DO_8_TIMES(MULADD_OP, "") : [carry]"=r"(carry) : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) : "cc", "%eax", "%edx"); return carry; #elif defined(BOTAN_MP_USE_X86_64_ASM) asm( DO_8_TIMES(MULADD_OP, "") : [carry]"=r"(carry) : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) : "cc", "%rax", "%rdx"); return carry; #else z[0] = word_madd3(x[0], y, z[0], &carry); z[1] = word_madd3(x[1], y, z[1], &carry); z[2] = word_madd3(x[2], y, z[2], &carry); z[3] = word_madd3(x[3], y, z[3], &carry); z[4] = word_madd3(x[4], y, z[4], &carry); z[5] = word_madd3(x[5], y, z[5], &carry); z[6] = word_madd3(x[6], y, z[6], &carry); z[7] = word_madd3(x[7], y, z[7], &carry); return carry; #endif } /* * Multiply-Add Accumulator * (w2,w1,w0) += x * y */ inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) { #if defined(BOTAN_MP_USE_X86_32_ASM) word z0 = 0, z1 = 0; asm("mull %[y]" : "=a"(z0),"=d"(z1) : "a"(x), [y]"rm"(y) : "cc"); asm(R"( addl %[z0],%[w0] adcl %[z1],%[w1] adcl $0,%[w2] )" : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) : "cc"); #elif defined(BOTAN_MP_USE_X86_64_ASM) word z0 = 0, z1 = 0; asm("mulq %[y]" : "=a"(z0),"=d"(z1) : "a"(x), [y]"rm"(y) : "cc"); asm(R"( addq %[z0],%[w0] adcq %[z1],%[w1] adcq $0,%[w2] )" : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) : "cc"); #else word carry = *w0; *w0 = word_madd2(x, y, &carry); *w1 += carry; *w2 += (*w1 < carry); #endif } /* * 3-word addition * (w2,w1,w0) += x */ inline void word3_add(word* w2, word* w1, word* w0, word x) { #if defined(BOTAN_MP_USE_X86_32_ASM) asm(R"( addl %[x],%[w0] adcl $0,%[w1] adcl $0,%[w2] )" : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) : "cc"); #elif defined(BOTAN_MP_USE_X86_64_ASM) asm(R"( addq %[x],%[w0] adcq $0,%[w1] adcq $0,%[w2] )" : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) : "cc"); #else *w0 += x; word c1 = (*w0 < x); *w1 += c1; word c2 = (*w1 < c1); *w2 += c2; #endif } /* * Multiply-Add Accumulator * (w2,w1,w0) += 2 * x * y */ inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) { #if defined(BOTAN_MP_USE_X86_32_ASM) word z0 = 0, z1 = 0; asm("mull %[y]" : "=a"(z0),"=d"(z1) : "a"(x), [y]"rm"(y) : "cc"); asm(R"( addl %[z0],%[w0] adcl %[z1],%[w1] adcl $0,%[w2] addl %[z0],%[w0] adcl %[z1],%[w1] adcl $0,%[w2] )" : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) : "cc"); #elif defined(BOTAN_MP_USE_X86_64_ASM) word z0 = 0, z1 = 0; asm("mulq %[y]" : "=a"(z0),"=d"(z1) : "a"(x), [y]"rm"(y) : "cc"); asm(R"( addq %[z0],%[w0] adcq %[z1],%[w1] adcq $0,%[w2] addq %[z0],%[w0] adcq %[z1],%[w1] adcq $0,%[w2] )" : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) : "cc"); #else word carry = 0; x = word_madd2(x, y, &carry); y = carry; word top = (y >> (BOTAN_MP_WORD_BITS-1)); y <<= 1; y |= (x >> (BOTAN_MP_WORD_BITS-1)); x <<= 1; carry = 0; *w0 = word_add(*w0, x, &carry); *w1 = word_add(*w1, y, &carry); *w2 = word_add(*w2, top, &carry); #endif } #if defined(ASM) #undef ASM #undef DO_8_TIMES #undef ADD_OR_SUBTRACT #undef ADDSUB2_OP #undef ADDSUB3_OP #undef LINMUL_OP #undef MULADD_OP #endif } #endif