/* Assembly for fixed base scalar multiplication */ #include "crypto_asm_hidden.h" .p2align 4 ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(base) .globl _CRYPTO_SHARED_NAMESPACE(base) ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(base) .globl CRYPTO_SHARED_NAMESPACE(base) _CRYPTO_SHARED_NAMESPACE(base): CRYPTO_SHARED_NAMESPACE(base): sub sp, sp, #512 stp x19, x20, [sp, #0] stp x21, x22, [sp, #16] stp x23, x24, [sp, #32] stp x25, x26, [sp, #48] stp x27, x28, [sp, #64] stp x29, x30, [sp, #80] stp x0, x1, [sp, #96] str x2, [sp, #112] mov x18, #38 lsr x19, x18, #1 mov x21, #0x8000000000000000 mov x22, #0xFFFFFFFFFFFFED00 mov x23, #-1 mov x24, #0x7F /* choose t and initialize r */ mov x25, x2 ldrb w29, [x1] uxtb w30, w29 sxtb x2, w30 mov x1, x2 asr x1, x1, #7 mov x29, x2 add x29, x29, x1 eor x29, x29, x1 mov x3, #1 mov x4, xzr mov x5, xzr mov x6, xzr mov x7, #1 mov x8, xzr mov x9, xzr mov x10, xzr mov x28, x25 cmp x29, #1 ldp x11, x12, [x28] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #16] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #32] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #48] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #2 ldp x11, x12, [x28, #96] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #112] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #128] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #144] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #3 ldp x11, x12, [x28, #192] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #208] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #224] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #240] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #4 ldp x11, x12, [x28, #288] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #304] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #320] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #336] csel x9, x11, x9, eq csel x10, x12, x10, eq add x28, x28, #384 cmp x29, #5 ldp x11, x12, [x28] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #16] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #32] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #48] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #6 ldp x11, x12, [x28, #96] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #112] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #128] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #144] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #7 ldp x11, x12, [x28, #192] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #208] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #224] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #240] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #8 ldp x11, x12, [x28, #288] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #304] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #320] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #336] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x2, xzr mov x11, x3 csel x3, x7, x3, lt csel x7, x11, x7, lt mov x11, x4 csel x4, x8, x4, lt csel x8, x11, x8, lt mov x11, x5 csel x5, x9, x5, lt csel x9, x11, x9, lt mov x11, x6 csel x6, x10, x6, lt csel x10, x11, x10, lt mov x11, x7 mov x12, x8 mov x13, x9 mov x14, x10 // sub subs x7, x7, x3 sbcs x8, x8, x4 sbcs x9, x9, x5 sbcs x10, x10, x6 csel x30, xzr, x18, cs subs x7, x7, x30 sbcs x8, x8, xzr sbcs x9, x9, xzr sbcs x10, x10, xzr csel x30, xzr, x18, cs sub x7, x7, x30 stp x7, x8, [sp, #120] stp x9, x10, [sp, #136] str xzr, [sp, #152] // add adds x3, x11, x3 adcs x4, x12, x4 adcs x5, x13, x5 adcs x6, x14, x6 csel x30, x18, xzr, cs adds x3, x3, x30 adcs x4, x4, xzr adcs x5, x5, xzr adcs x6, x6, xzr csel x30, x18, xzr, cs add x3, x3, x30 stp x3, x4, [sp, #160] stp x5, x6, [sp, #176] str xzr, [sp, #192] mov x3, xzr mov x4, xzr mov x5, xzr mov x6, xzr mov x28, x25 cmp x29, #1 ldp x11, x12, [x28, #64] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #80] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #2 ldp x11, x12, [x28, #160] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #176] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #3 ldp x11, x12, [x28, #256] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #272] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #4 ldp x11, x12, [x28, #352] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #368] csel x5, x11, x5, eq csel x6, x12, x6, eq add x28, x28, #384 cmp x29, #5 ldp x11, x12, [x28, #64] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #80] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #6 ldp x11, x12, [x28, #160] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #176] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #7 ldp x11, x12, [x28, #256] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #272] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #8 ldp x11, x12, [x28, #352] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #368] csel x5, x11, x5, eq csel x6, x12, x6, eq mov x7, xzr mov x8, xzr mov x9, xzr mov x10, xzr // sub subs x7, x7, x3 sbcs x8, x8, x4 sbcs x9, x9, x5 sbcs x10, x10, x6 csel x30, xzr, x18, cs subs x7, x7, x30 sbcs x8, x8, xzr sbcs x9, x9, xzr sbcs x10, x10, xzr csel x30, xzr, x18, cs sub x7, x7, xzr cmp x2, xzr csel x3, x7, x3, lt csel x4, x8, x4, lt csel x5, x9, x5, lt csel x6, x10, x6, lt stp x3, x4, [sp, #240] stp x5, x6, [sp, #256] mov x3, #2 stp x3, xzr, [sp, #200] stp xzr, xzr, [sp, #216] str xzr, [sp, #232] /* loop: i=1,i<64,i=i1 */ mov w27, #1 .L: /* choose t */ ldr x25, [sp, #112] ldr x26, [sp, #104] add x26, x26, x27 ldrb w29, [x26] uxtb w30, w29 sxtb x2, w30 mov x20, #768 mul x28, x27, x20 mov x1, x2 asr x1, x1, #7 mov x29, x2 add x29, x29, x1 eor x29, x29, x1 mov x3, #1 mov x4, xzr mov x5, xzr mov x6, xzr mov x7, #1 mov x8, xzr mov x9, xzr mov x10, xzr add x28, x28, x25 mov x15, x28 cmp x29, #1 ldp x11, x12, [x28] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #16] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #32] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #48] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #2 ldp x11, x12, [x28, #96] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #112] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #128] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #144] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #3 ldp x11, x12, [x28, #192] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #208] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #224] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #240] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #4 ldp x11, x12, [x28, #288] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #304] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #320] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #336] csel x9, x11, x9, eq csel x10, x12, x10, eq add x28, x28, #384 cmp x29, #5 ldp x11, x12, [x28] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #16] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #32] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #48] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #6 ldp x11, x12, [x28, #96] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #112] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #128] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #144] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #7 ldp x11, x12, [x28, #192] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #208] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #224] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #240] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x29, #8 ldp x11, x12, [x28, #288] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #304] csel x5, x11, x5, eq csel x6, x12, x6, eq ldp x11, x12, [x28, #320] csel x7, x11, x7, eq csel x8, x12, x8, eq ldp x11, x12, [x28, #336] csel x9, x11, x9, eq csel x10, x12, x10, eq cmp x2, xzr mov x11, x3 csel x3, x7, x3, lt csel x7, x11, x7, lt mov x11, x4 csel x4, x8, x4, lt csel x8, x11, x8, lt mov x11, x5 csel x5, x9, x5, lt csel x9, x11, x9, lt mov x11, x6 csel x6, x10, x6, lt csel x10, x11, x10, lt stp x3, x4, [sp, #272] stp x5, x6, [sp, #288] stp x7, x8, [sp, #304] stp x9, x10, [sp, #320] mov x3, xzr mov x4, xzr mov x5, xzr mov x6, xzr mov x28, x15 cmp x29, #1 ldp x11, x12, [x28, #64] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #80] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #2 ldp x11, x12, [x28, #160] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #176] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #3 ldp x11, x12, [x28, #256] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #272] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #4 ldp x11, x12, [x28, #352] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #368] csel x5, x11, x5, eq csel x6, x12, x6, eq add x28, x28, #384 cmp x29, #5 ldp x11, x12, [x28, #64] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #80] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #6 ldp x11, x12, [x28, #160] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #176] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #7 ldp x11, x12, [x28, #256] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #272] csel x5, x11, x5, eq csel x6, x12, x6, eq cmp x29, #8 ldp x11, x12, [x28, #352] csel x3, x11, x3, eq csel x4, x12, x4, eq ldp x11, x12, [x28, #368] csel x5, x11, x5, eq csel x6, x12, x6, eq mov x7, xzr mov x8, xzr mov x9, xzr mov x10, xzr // sub subs x7, x7, x3 sbcs x8, x8, x4 sbcs x9, x9, x5 sbcs x10, x10, x6 csel x30, xzr, x18, cs subs x7, x7, x30 sbcs x8, x8, xzr sbcs x9, x9, xzr sbcs x10, x10, xzr csel x30, xzr, x18, cs sub x7, x7, x30 cmp x2, xzr csel x3, x7, x3, lt csel x4, x8, x4, lt csel x5, x9, x5, lt csel x6, x10, x6, lt stp x3, x4, [sp, #336] stp x5, x6, [sp, #352] /* nielsadd2 */ // load ldp x3, x4, [sp, #160] ldp x5, x6, [sp, #176] ldr x7, [sp, #192] // copy mov x8, x3 mov x9, x4 mov x10, x5 mov x11, x6 mov x12, x7 // load ldp x13, x14, [sp, #120] ldp x15, x16, [sp, #136] ldr x17, [sp, #152] // sub adds x8, x8, x22 adcs x9, x9, x23 adcs x10, x10, x23 adcs x11, x11, x23 adc x12, x12, x24 subs x8, x8, x13 sbcs x9, x9, x14 sbcs x10, x10, x15 sbcs x11, x11, x16 sbc x12, x12, x17 cmn x11, x11 adc x12, x12, x12 mul x12, x12, x19 bic x11, x11, x21 adds x8, x8, x12 adcs x9, x9, xzr adcs x10, x10, xzr adc x11, x11, xzr stp x8, x9, [sp, #368] stp x10, x11, [sp, #384] // add adds x3, x3, x13 adcs x4, x4, x14 adcs x5, x5, x15 adcs x6, x6, x16 adc x7, x7, x17 cmn x6, x6 adc x7, x7, x7 mul x7, x7, x19 bic x6, x6, x21 adds x3, x3, x7 adcs x4, x4, xzr adcs x5, x5, xzr adc x6, x6, xzr stp x3, x4, [sp, #408] stp x5, x6, [sp, #424] // mul ldp x3, x4, [sp, #272] ldp x5, x6, [sp, #288] ldp x7, x16, [sp, #368] ldp x17, x30, [sp, #384] mul x8, x4, x30 mul x1, x5, x17 adds x8, x8, x1 cset x9, cs mul x1, x6, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x3, x30 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x4, x17 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x5, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x6, x7 adds x10, x8, x1 adcs x9, x9, xzr mul x8, x18, x10 umulh x10, x18, x10 mul x9, x18, x9 add x9, x9, x10 mul x1, x3, x7 adds x8, x8, x1 adcs x9, x9, xzr mul x10, x5, x30 mul x1, x6, x17 adds x10, x10, x1 cset x11, cs umulh x1, x4, x30 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x5, x17 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x6, x16 adds x12, x10, x1 adcs x11, x11, xzr mul x10, x18, x12 umulh x12, x18, x12 mul x11, x18, x11 add x11, x11, x12 mul x1, x3, x16 adds x10, x10, x1 adcs x11, x11, xzr mul x1, x4, x7 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x3, x7 adds x10, x10, x1 adcs x11, x11, xzr mul x12, x6, x30 umulh x1, x5, x30 adds x12, x12, x1 cset x13, cs umulh x1, x6, x17 adds x14, x12, x1 adcs x13, x13, xzr mul x12, x18, x14 umulh x14, x18, x14 mul x13, x18, x13 add x13, x13, x14 mul x1, x3, x17 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x4, x16 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x5, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x3, x16 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x4, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x15, x6, x30 mul x14, x18, x15 umulh x15, x18, x15 mul x1, x3, x30 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x4, x17 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x5, x16 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x6, x7 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x3, x17 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x4, x16 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x5, x7 adds x14, x14, x1 adcs x15, x15, xzr adds x9, x10, x9 adcs x10, x12, x11 adcs x11, x14, x13 adcs x7, x15, xzr stp x8, x9, [sp, #368] stp x10, x11, [sp, #384] str x7, [sp, #400] // mul ldp x3, x4, [sp, #304] ldp x5, x6, [sp, #320] ldp x7, x16, [sp, #408] ldp x17, x30, [sp, #424] mul x8, x4, x30 mul x1, x5, x17 adds x8, x8, x1 cset x9, cs mul x1, x6, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x3, x30 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x4, x17 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x5, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x6, x7 adds x10, x8, x1 adcs x9, x9, xzr mul x8, x18, x10 umulh x10, x18, x10 mul x9, x18, x9 add x9, x9, x10 mul x1, x3, x7 adds x8, x8, x1 adcs x9, x9, xzr mul x10, x5, x30 mul x1, x6, x17 adds x10, x10, x1 cset x11, cs umulh x1, x4, x30 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x5, x17 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x6, x16 adds x12, x10, x1 adcs x11, x11, xzr mul x10, x18, x12 umulh x12, x18, x12 mul x11, x18, x11 add x11, x11, x12 mul x1, x3, x16 adds x10, x10, x1 adcs x11, x11, xzr mul x1, x4, x7 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x3, x7 adds x10, x10, x1 adcs x11, x11, xzr mul x12, x6, x30 umulh x1, x5, x30 adds x12, x12, x1 cset x13, cs umulh x1, x6, x17 adds x14, x12, x1 adcs x13, x13, xzr mul x12, x18, x14 umulh x14, x18, x14 mul x13, x18, x13 add x13, x13, x14 mul x1, x3, x17 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x4, x16 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x5, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x3, x16 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x4, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x15, x6, x30 mul x14, x18, x15 umulh x15, x18, x15 mul x1, x3, x30 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x4, x17 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x5, x16 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x6, x7 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x3, x17 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x4, x16 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x5, x7 adds x14, x14, x1 adcs x15, x15, xzr adds x9, x10, x9 adcs x10, x12, x11 adcs x11, x14, x13 adcs x7, x15, xzr // copy mov x3, x8 mov x4, x9 mov x5, x10 mov x6, x11 mov x12, x7 // load ldp x13, x14, [sp, #368] ldp x15, x16, [sp, #384] ldr x17, [sp, #400] // sub adds x8, x8, x22 adcs x9, x9, x23 adcs x10, x10, x23 adcs x11, x11, x23 adc x12, x12, x24 subs x8, x8, x13 sbcs x9, x9, x14 sbcs x10, x10, x15 sbcs x11, x11, x16 sbc x12, x12, x17 cmn x11, x11 adc x12, x12, x12 mul x12, x12, x19 bic x11, x11, x21 adds x8, x8, x12 adcs x9, x9, xzr adcs x10, x10, xzr adc x11, x11, xzr stp x8, x9, [sp, #408] stp x10, x11, [sp, #424] // add adds x3, x3, x13 adcs x4, x4, x14 adcs x5, x5, x15 adcs x6, x6, x16 adc x7, x7, x17 cmn x6, x6 adc x7, x7, x7 mul x7, x7, x19 bic x6, x6, x21 adds x3, x3, x7 adcs x4, x4, xzr adcs x5, x5, xzr adc x6, x6, xzr stp x3, x4, [sp, #368] stp x5, x6, [sp, #384] // mul ldp x3, x4, [sp, #240] ldp x5, x6, [sp, #256] ldp x7, x16, [sp, #336] ldp x17, x30, [sp, #352] mul x8, x4, x30 mul x1, x5, x17 adds x8, x8, x1 cset x9, cs mul x1, x6, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x3, x30 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x4, x17 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x5, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x6, x7 adds x10, x8, x1 adcs x9, x9, xzr mul x8, x18, x10 umulh x10, x18, x10 mul x9, x18, x9 add x9, x9, x10 mul x1, x3, x7 adds x8, x8, x1 adcs x9, x9, xzr mul x10, x5, x30 mul x1, x6, x17 adds x10, x10, x1 cset x11, cs umulh x1, x4, x30 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x5, x17 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x6, x16 adds x12, x10, x1 adcs x11, x11, xzr mul x10, x18, x12 umulh x12, x18, x12 mul x11, x18, x11 add x11, x11, x12 mul x1, x3, x16 adds x10, x10, x1 adcs x11, x11, xzr mul x1, x4, x7 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x3, x7 adds x10, x10, x1 adcs x11, x11, xzr mul x12, x6, x30 umulh x1, x5, x30 adds x12, x12, x1 cset x13, cs umulh x1, x6, x17 adds x14, x12, x1 adcs x13, x13, xzr mul x12, x18, x14 umulh x14, x18, x14 mul x13, x18, x13 add x13, x13, x14 mul x1, x3, x17 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x4, x16 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x5, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x3, x16 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x4, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x15, x6, x30 mul x14, x18, x15 umulh x15, x18, x15 mul x1, x3, x30 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x4, x17 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x5, x16 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x6, x7 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x3, x17 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x4, x16 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x5, x7 adds x14, x14, x1 adcs x15, x15, xzr adds x9, x10, x9 adcs x10, x12, x11 adcs x11, x14, x13 adcs x7, x15, xzr // load ldp x3, x4, [sp, #200] ldp x5, x6, [sp, #216] ldr x12, [sp, #232] // double adds x3, x3, x3 adcs x4, x4, x4 adcs x5, x5, x5 adcs x6, x6, x6 adc x12, x12, x12 // copy mov x13, x3 mov x14, x4 mov x15, x5 mov x16, x6 mov x17, x12 // sub adds x3, x3, x22 adcs x4, x4, x23 adcs x5, x5, x23 adcs x6, x6, x23 adc x12, x12, x24 subs x3, x3, x8 sbcs x4, x4, x9 sbcs x5, x5, x10 sbcs x6, x6, x11 sbc x12, x12, x7 cmn x6, x6 adc x12, x12, x12 mul x12, x12, x19 bic x6, x6, x21 adds x3, x3, x12 adcs x4, x4, xzr adcs x5, x5, xzr adc x6, x6, xzr stp x3, x4, [sp, #480] add x29, sp, #496 stp x5, x6, [x29] // add adds x8, x8, x13 adcs x9, x9, x14 adcs x10, x10, x15 adcs x11, x11, x16 adc x7, x7, x17 cmn x11, x11 adc x7, x7, x7 mul x7, x7, x19 bic x11, x11, x21 adds x8, x8, x7 adcs x9, x9, xzr adcs x10, x10, xzr adc x11, x11, xzr stp x8, x9, [sp, #448] stp x10, x11, [sp, #464] /* p1p1 to p3 */ // mul ldp x3, x4, [sp, #408] ldp x5, x6, [sp, #424] ldp x7, x16, [sp, #480] add x15, sp, #496 ldp x17, x30, [x15] mul x8, x4, x30 mul x1, x5, x17 adds x8, x8, x1 cset x9, cs mul x1, x6, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x3, x30 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x4, x17 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x5, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x6, x7 adds x10, x8, x1 adcs x9, x9, xzr mul x8, x18, x10 umulh x10, x18, x10 mul x9, x18, x9 add x9, x9, x10 mul x1, x3, x7 adds x8, x8, x1 adcs x9, x9, xzr mul x10, x5, x30 mul x1, x6, x17 adds x10, x10, x1 cset x11, cs umulh x1, x4, x30 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x5, x17 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x6, x16 adds x12, x10, x1 adcs x11, x11, xzr mul x10, x18, x12 umulh x12, x18, x12 mul x11, x18, x11 add x11, x11, x12 mul x1, x3, x16 adds x10, x10, x1 adcs x11, x11, xzr mul x1, x4, x7 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x3, x7 adds x10, x10, x1 adcs x11, x11, xzr mul x12, x6, x30 umulh x1, x5, x30 adds x12, x12, x1 cset x13, cs umulh x1, x6, x17 adds x14, x12, x1 adcs x13, x13, xzr mul x12, x18, x14 umulh x14, x18, x14 mul x13, x18, x13 add x13, x13, x14 mul x1, x3, x17 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x4, x16 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x5, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x3, x16 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x4, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x15, x6, x30 mul x14, x18, x15 umulh x15, x18, x15 mul x1, x3, x30 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x4, x17 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x5, x16 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x6, x7 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x3, x17 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x4, x16 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x5, x7 adds x14, x14, x1 adcs x15, x15, xzr adds x9, x10, x9 adcs x10, x12, x11 adcs x11, x14, x13 adcs x7, x15, xzr stp x8, x9, [sp, #120] stp x10, x11, [sp, #136] str x7, [sp, #152] // mul ldp x3, x4, [sp, #368] ldp x5, x6, [sp, #384] ldp x7, x16, [sp, #448] ldp x17, x30, [sp, #464] mul x8, x4, x30 mul x1, x5, x17 adds x8, x8, x1 cset x9, cs mul x1, x6, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x3, x30 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x4, x17 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x5, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x6, x7 adds x10, x8, x1 adcs x9, x9, xzr mul x8, x18, x10 umulh x10, x18, x10 mul x9, x18, x9 add x9, x9, x10 mul x1, x3, x7 adds x8, x8, x1 adcs x9, x9, xzr mul x10, x5, x30 mul x1, x6, x17 adds x10, x10, x1 cset x11, cs umulh x1, x4, x30 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x5, x17 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x6, x16 adds x12, x10, x1 adcs x11, x11, xzr mul x10, x18, x12 umulh x12, x18, x12 mul x11, x18, x11 add x11, x11, x12 mul x1, x3, x16 adds x10, x10, x1 adcs x11, x11, xzr mul x1, x4, x7 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x3, x7 adds x10, x10, x1 adcs x11, x11, xzr mul x12, x6, x30 umulh x1, x5, x30 adds x12, x12, x1 cset x13, cs umulh x1, x6, x17 adds x14, x12, x1 adcs x13, x13, xzr mul x12, x18, x14 umulh x14, x18, x14 mul x13, x18, x13 add x13, x13, x14 mul x1, x3, x17 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x4, x16 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x5, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x3, x16 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x4, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x15, x6, x30 mul x14, x18, x15 umulh x15, x18, x15 mul x1, x3, x30 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x4, x17 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x5, x16 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x6, x7 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x3, x17 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x4, x16 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x5, x7 adds x14, x14, x1 adcs x15, x15, xzr adds x9, x10, x9 adcs x10, x12, x11 adcs x11, x14, x13 adcs x7, x15, xzr stp x8, x9, [sp, #160] stp x10, x11, [sp, #176] str x7, [sp, #192] // mul ldp x3, x4, [sp, #448] ldp x5, x6, [sp, #464] ldp x7, x16, [sp, #480] add x15, sp, #496 ldp x17, x30, [x15] mul x8, x4, x30 mul x1, x5, x17 adds x8, x8, x1 cset x9, cs mul x1, x6, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x3, x30 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x4, x17 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x5, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x6, x7 adds x10, x8, x1 adcs x9, x9, xzr mul x8, x18, x10 umulh x10, x18, x10 mul x9, x18, x9 add x9, x9, x10 mul x1, x3, x7 adds x8, x8, x1 adcs x9, x9, xzr mul x10, x5, x30 mul x1, x6, x17 adds x10, x10, x1 cset x11, cs umulh x1, x4, x30 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x5, x17 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x6, x16 adds x12, x10, x1 adcs x11, x11, xzr mul x10, x18, x12 umulh x12, x18, x12 mul x11, x18, x11 add x11, x11, x12 mul x1, x3, x16 adds x10, x10, x1 adcs x11, x11, xzr mul x1, x4, x7 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x3, x7 adds x10, x10, x1 adcs x11, x11, xzr mul x12, x6, x30 umulh x1, x5, x30 adds x12, x12, x1 cset x13, cs umulh x1, x6, x17 adds x14, x12, x1 adcs x13, x13, xzr mul x12, x18, x14 umulh x14, x18, x14 mul x13, x18, x13 add x13, x13, x14 mul x1, x3, x17 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x4, x16 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x5, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x3, x16 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x4, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x15, x6, x30 mul x14, x18, x15 umulh x15, x18, x15 mul x1, x3, x30 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x4, x17 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x5, x16 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x6, x7 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x3, x17 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x4, x16 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x5, x7 adds x14, x14, x1 adcs x15, x15, xzr adds x9, x10, x9 adcs x10, x12, x11 adcs x11, x14, x13 adcs x7, x15, xzr stp x8, x9, [sp, #200] stp x10, x11, [sp, #216] str x7, [sp, #232] // mul ldp x3, x4, [sp, #368] ldp x5, x6, [sp, #384] ldp x7, x16, [sp, #408] ldp x17, x30, [sp, #424] mul x8, x4, x30 mul x1, x5, x17 adds x8, x8, x1 cset x9, cs mul x1, x6, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x3, x30 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x4, x17 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x5, x16 adds x8, x8, x1 adcs x9, x9, xzr umulh x1, x6, x7 adds x10, x8, x1 adcs x9, x9, xzr mul x8, x18, x10 umulh x10, x18, x10 mul x9, x18, x9 add x9, x9, x10 mul x1, x3, x7 adds x8, x8, x1 adcs x9, x9, xzr mul x10, x5, x30 mul x1, x6, x17 adds x10, x10, x1 cset x11, cs umulh x1, x4, x30 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x5, x17 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x6, x16 adds x12, x10, x1 adcs x11, x11, xzr mul x10, x18, x12 umulh x12, x18, x12 mul x11, x18, x11 add x11, x11, x12 mul x1, x3, x16 adds x10, x10, x1 adcs x11, x11, xzr mul x1, x4, x7 adds x10, x10, x1 adcs x11, x11, xzr umulh x1, x3, x7 adds x10, x10, x1 adcs x11, x11, xzr mul x12, x6, x30 umulh x1, x5, x30 adds x12, x12, x1 cset x13, cs umulh x1, x6, x17 adds x14, x12, x1 adcs x13, x13, xzr mul x12, x18, x14 umulh x14, x18, x14 mul x13, x18, x13 add x13, x13, x14 mul x1, x3, x17 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x4, x16 adds x12, x12, x1 adcs x13, x13, xzr mul x1, x5, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x3, x16 adds x12, x12, x1 adcs x13, x13, xzr umulh x1, x4, x7 adds x12, x12, x1 adcs x13, x13, xzr umulh x15, x6, x30 mul x14, x18, x15 umulh x15, x18, x15 mul x1, x3, x30 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x4, x17 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x5, x16 adds x14, x14, x1 adcs x15, x15, xzr mul x1, x6, x7 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x3, x17 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x4, x16 adds x14, x14, x1 adcs x15, x15, xzr umulh x1, x5, x7 adds x14, x14, x1 adcs x15, x15, xzr adds x10, x10, x9 adcs x12, x12, x11 adcs x14, x14, x13 adc x15, x15, xzr cmn x14, x14 adc x15, x15, x15 mul x15, x15, x19 bic x14, x14, x21 adds x8, x8, x15 adcs x9, x10, xzr adcs x10, x12, xzr adc x11, x14, xzr stp x8, x9, [sp, #240] stp x10, x11, [sp, #256] add w27, w27, #1 cmp w27, #63 ble .L /* store final value of r */ ldr x0, [sp, #96] ldp x3, x4, [sp, #120] ldp x5, x6, [sp, #136] ldr x7, [sp, #152] cmn x6, x6 adc x7, x7, x7 mul x7, x7, x19 bic x6, x6, x21 adds x3, x3, x7 adcs x4, x4, xzr adcs x5, x5, xzr adc x6, x6, xzr stp x3, x4, [x0, #0] stp x5, x6, [x0, #16] ldp x3, x4, [sp, #160] ldp x5, x6, [sp, #176] ldr x7, [sp, #192] cmn x6, x6 adc x7, x7, x7 mul x7, x7, x19 bic x6, x6, x21 adds x3, x3, x7 adcs x4, x4, xzr adcs x5, x5, xzr adc x6, x6, xzr stp x3, x4, [x0, #32] stp x5, x6, [x0, #48] ldp x3, x4, [sp, #200] ldp x5, x6, [sp, #216] ldr x7, [sp, #232] cmn x6, x6 adc x7, x7, x7 mul x7, x7, x19 bic x6, x6, x21 adds x3, x3, x7 adcs x4, x4, xzr adcs x5, x5, xzr adc x6, x6, xzr stp x3, x4, [x0, #64] stp x5, x6, [x0, #80] ldp x3, x4, [sp, #240] ldp x5, x6, [sp, #256] stp x3, x4, [x0, #96] stp x5, x6, [x0, #112] ldp x29, x30, [sp, #80] ldp x27, x28, [sp, #64] ldp x25, x26, [sp, #48] ldp x23, x24, [sp, #32] ldp x21, x22, [sp, #16] ldp x19, x20, [sp, #0] add sp, sp, #512 ret