-rw-r--r-- 73224 lib25519-20240321/crypto_nP/montgomery25519/amd64-maa4/mladder.S raw
#include "crypto_asm_hidden.h"
// linker define mladder
// linker use mask63
// linker use clamp012
// linker use clamp254
// linker use twoexp8_p0
// linker use twoexp8_p123
// linker use twoexp8_p4
/* Assembly for Montgomery ladder. */
#define mask63 CRYPTO_SHARED_NAMESPACE(mask63)
#define clamp012 CRYPTO_SHARED_NAMESPACE(clamp012)
#define clamp254 CRYPTO_SHARED_NAMESPACE(clamp254)
#define twoexp8_p0 CRYPTO_SHARED_NAMESPACE(twoexp8_p0)
#define twoexp8_p123 CRYPTO_SHARED_NAMESPACE(twoexp8_p123)
#define twoexp8_p4 CRYPTO_SHARED_NAMESPACE(twoexp8_p4)
.p2align 5
ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(mladder)
.globl _CRYPTO_SHARED_NAMESPACE(mladder)
ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(mladder)
.globl CRYPTO_SHARED_NAMESPACE(mladder)
_CRYPTO_SHARED_NAMESPACE(mladder):
CRYPTO_SHARED_NAMESPACE(mladder):
movq %rsp,%r11
andq $-32,%rsp
subq $568,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq %rdi,56(%rsp)
movq %rdx,64(%rsp)
// clamp scalar
movq 0(%rdx),%r8
movq 24(%rdx),%r9
andq clamp012(%rip),%r8
orq clamp254(%rip),%r9
movq %r8,0(%rdx)
movq %r9,24(%rdx)
// X1 = XP,X3 = XP
movq 0(%rsi),%rax
movq %rax,72(%rsp)
movq %rax,184(%rsp)
movq 8(%rsi),%rbx
movq %rbx,80(%rsp)
movq %rbx,192(%rsp)
movq 16(%rsi),%rbp
movq %rbp,88(%rsp)
movq %rbp,200(%rsp)
movq 24(%rsi),%rsi
movq %rsi,96(%rsp)
movq %rsi,208(%rsp)
movq $0,216(%rsp)
// Z3 = 1
movq $1,224(%rsp)
movq $0,232(%rsp)
movq $0,240(%rsp)
movq $0,248(%rsp)
movq $0,256(%rsp)
// pre-process for the bit n[254] = 1
// T2 = 2X3
shld $1,%rbp,%rsi
shld $1,%rbx,%rbp
shld $1,%rax,%rbx
shlq $1,%rax
movq %rax,312(%rsp)
movq %rbx,320(%rsp)
movq %rbp,328(%rsp)
movq %rsi,336(%rsp)
// T1 = 4X3 = 2T2
xorq %rdi,%rdi
shld $1,%rsi,%rdi
shld $1,%rbp,%rsi
shld $1,%rbx,%rbp
shld $1,%rax,%rbx
shlq $1,%rax
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rax
adcq $0,%rbx
adcq $0,%rbp
adcq $0,%rsi
movq %rax,280(%rsp)
movq %rbx,288(%rsp)
movq %rbp,296(%rsp)
movq %rsi,304(%rsp)
// T = X3^2 + 1
movq 184(%rsp),%rbx
movq 192(%rsp),%rbp
movq 200(%rsp),%rcx
movq 208(%rsp),%rsi
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq $1,%r8
adcq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// copy = X3^2 + 1
movq %r8,%rax
movq %r10,%rbx
movq %r12,%rbp
movq %r14,%rsi
movq %r15,%rdi
// T3 = (X3 + 1)^2 = X3^2 + 1 + 2X3
addq 312(%rsp),%r8
adcq 320(%rsp),%r10
adcq 328(%rsp),%r12
adcq 336(%rsp),%r14
adcq $0,%r15
shld $1,%r14,%r15
andq mask63(%rip),%r14
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,344(%rsp)
movq %r10,352(%rsp)
movq %r12,360(%rsp)
movq %r14,368(%rsp)
// T4 = (X3 - 1)^2 = X3^2 + 1 - 2X3
addq twoexp8_p0(%rip),%rax
adcq twoexp8_p123(%rip),%rbx
adcq twoexp8_p123(%rip),%rbp
adcq twoexp8_p123(%rip),%rsi
adcq twoexp8_p4(%rip),%rdi
subq 312(%rsp),%rax
sbbq 320(%rsp),%rbx
sbbq 328(%rsp),%rbp
sbbq 336(%rsp),%rsi
sbbq $0,%rdi
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rax
adcq $0,%rbx
adcq $0,%rbp
adcq $0,%rsi
movq %rax,376(%rsp)
movq %rbx,384(%rsp)
movq %rbp,392(%rsp)
movq %rsi,400(%rsp)
// T2 = ((A + 2)/4) · T1
movq $121666,%rax
mulq 280(%rsp)
movq %rax,%r8
movq %rdx,%r9
movq $0,%r10
movq $121666,%rax
mulq 288(%rsp)
addq %rax,%r9
adcq %rdx,%r10
movq $0,%r11
movq $121666,%rax
mulq 296(%rsp)
addq %rax,%r10
adcq %rdx,%r11
movq $0,%r12
movq $121666,%rax
mulq 304(%rsp)
addq %rax,%r11
adcq %rdx,%r12
// T2 = T2 + T4
addq 376(%rsp),%r8
adcq 384(%rsp),%r9
adcq 392(%rsp),%r10
adcq 400(%rsp),%r11
adcq $0,%r12
shld $1,%r11,%r12
andq mask63(%rip),%r11
imul $19,%r12,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,312(%rsp)
movq %r9,320(%rsp)
movq %r10,328(%rsp)
movq %r11,336(%rsp)
// X2 = T3 · T4
movq 352(%rsp),%rax
mulq 400(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 360(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 368(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 360(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 368(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 368(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 344(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 352(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 360(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 368(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 344(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 344(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 352(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 344(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 352(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 360(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// X2
movq %r8,104(%rsp)
movq %r10,112(%rsp)
movq %r12,120(%rsp)
movq %r14,128(%rsp)
movq %r15,136(%rsp)
// Z2 = T1 · T2
movq 288(%rsp),%rax
mulq 336(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 296(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 304(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 296(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 304(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 304(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 280(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 288(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 296(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 304(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 280(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 280(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 288(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 280(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 288(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 296(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// Z2
movq %r8,144(%rsp)
movq %r10,152(%rsp)
movq %r12,160(%rsp)
movq %r14,168(%rsp)
movq %r15,176(%rsp)
movq $253,272(%rsp)
movb $1,264(%rsp)
// ladder loop for the bits n[253..3]
.L0: /*
* Montgomery ladder step
*
* T1 = X2 + Z2
* T2 = X2 - Z2
* T3 = X3 + Z3
* T4 = X3 - Z3
*
* bit = n[i]
* T6 = CSelect(T2,T4,bit,prevbit): if (bit <> prevbit) {T6 = T4} else {T6 = T2}
* T5 = CSelect(T1,T3,bit,prevbit): if (bit <> prevbit) {T5 = T3} else {T5 = T1}
* prevbit = bit
*
* Z3 = T2 · T3
* X3 = T1 · T4
* T6 = T6^2
* T5 = T5^2
* T8 = X3 + Z3
* T7 = X3 - Z3
* T1 = T7^2
* X3 = T8^2
* T7 = T5 - T6
* T8 = ((A + 2)/4) · T7
* T8 = T8 + T6
* X2 = T5 · T6
* Z2 = T7 · T8
* Z3 = T1 · X1
*
*/
// X2
movq 104(%rsp),%r8
movq 112(%rsp),%r9
movq 120(%rsp),%r10
movq 128(%rsp),%r11
movq 136(%rsp),%r12
// copy X2
movq %r8,%rax
movq %r9,%rbx
movq %r10,%rbp
movq %r11,%rsi
movq %r12,%rdi
// T1 = X2 + Z2
addq 144(%rsp),%r8
adcq 152(%rsp),%r9
adcq 160(%rsp),%r10
adcq 168(%rsp),%r11
adcq 176(%rsp),%r12
shld $1,%r11,%r12
andq mask63(%rip),%r11
imul $19,%r12,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,280(%rsp)
movq %r9,288(%rsp)
movq %r10,296(%rsp)
movq %r11,304(%rsp)
// T2 = X2 - Z2
addq twoexp8_p0(%rip),%rax
adcq twoexp8_p123(%rip),%rbx
adcq twoexp8_p123(%rip),%rbp
adcq twoexp8_p123(%rip),%rsi
adcq twoexp8_p4(%rip),%rdi
subq 144(%rsp),%rax
sbbq 152(%rsp),%rbx
sbbq 160(%rsp),%rbp
sbbq 168(%rsp),%rsi
sbbq 176(%rsp),%rdi
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rax
adcq $0,%rbx
adcq $0,%rbp
adcq $0,%rsi
movq %rax,312(%rsp)
movq %rbx,320(%rsp)
movq %rbp,328(%rsp)
movq %rsi,336(%rsp)
// X3
movq 184(%rsp),%r8
movq 192(%rsp),%r9
movq 200(%rsp),%r10
movq 208(%rsp),%r11
movq 216(%rsp),%r12
// copy X3
movq %r8,%rax
movq %r9,%rbx
movq %r10,%rbp
movq %r11,%rsi
movq %r12,%rdi
// T3 = X3 + Z3
addq 224(%rsp),%rax
adcq 232(%rsp),%rbx
adcq 240(%rsp),%rbp
adcq 248(%rsp),%rsi
adcq 256(%rsp),%rdi
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rax
adcq $0,%rbx
adcq $0,%rbp
adcq $0,%rsi
movq %rax,344(%rsp)
movq %rbx,352(%rsp)
movq %rbp,360(%rsp)
movq %rsi,368(%rsp)
// T4 = X3 - Z3
addq twoexp8_p0(%rip),%r8
adcq twoexp8_p123(%rip),%r9
adcq twoexp8_p123(%rip),%r10
adcq twoexp8_p123(%rip),%r11
adcq twoexp8_p4(%rip),%r12
subq 224(%rsp),%r8
sbbq 232(%rsp),%r9
sbbq 240(%rsp),%r10
sbbq 248(%rsp),%r11
sbbq 256(%rsp),%r12
shld $1,%r11,%r12
andq mask63(%rip),%r11
imul $19,%r12,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,376(%rsp)
movq %r9,384(%rsp)
movq %r10,392(%rsp)
movq %r11,400(%rsp)
// get current scalar bit
movq 272(%rsp),%rbx
movq %rbx,%rcx
shrq $6,%rbx
movq 64(%rsp),%rax
movq 0(%rax,%rbx,8),%rbx
shrq %rcx,%rbx
andb $1,%bl
// compare current with previous scalar bit
cmpb 264(%rsp),%bl
// update previous scalar bit
movb %bl,264(%rsp)
// T6 = CSelect(T2,T4,bit,prevbit)
movq 312(%rsp),%rax
movq 320(%rsp),%rbx
movq 328(%rsp),%rbp
movq 336(%rsp),%rsi
cmovne %r8,%rax
cmovne %r9,%rbx
cmovne %r10,%rbp
cmovne %r11,%rsi
movq %rax,448(%rsp)
movq %rbx,456(%rsp)
movq %rbp,464(%rsp)
movq %rsi,472(%rsp)
// T5 = CSelect(T1,T3,bit,prevbit)
movq 280(%rsp),%r8
movq 288(%rsp),%r9
movq 296(%rsp),%r10
movq 304(%rsp),%r11
movq 344(%rsp),%r12
movq 352(%rsp),%r13
movq 360(%rsp),%r14
movq 368(%rsp),%r15
cmovne %r12,%r8
cmovne %r13,%r9
cmovne %r14,%r10
cmovne %r15,%r11
movq %r8,408(%rsp)
movq %r9,416(%rsp)
movq %r10,424(%rsp)
movq %r11,432(%rsp)
// Z3 = T2 · T3
movq 352(%rsp),%rax
mulq 336(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 360(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 368(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 360(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 368(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 368(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 344(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 352(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 360(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 368(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 344(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 344(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 352(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 344(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 352(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 360(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
movq %r8,224(%rsp)
movq %r10,232(%rsp)
movq %r12,240(%rsp)
movq %r14,248(%rsp)
movq %r15,256(%rsp)
// X3 = T1 · T4
movq 288(%rsp),%rax
mulq 400(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 296(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 304(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 296(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 304(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 304(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 280(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 288(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 296(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 304(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 280(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 280(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 288(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 280(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 288(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 296(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
movq %r8,184(%rsp)
movq %r10,192(%rsp)
movq %r12,200(%rsp)
movq %r14,208(%rsp)
movq %r15,216(%rsp)
// T6 = T6^2
movq 448(%rsp),%rbx
movq 456(%rsp),%rbp
movq 464(%rsp),%rcx
movq 472(%rsp),%rsi
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,448(%rsp)
movq %r10,456(%rsp)
movq %r12,464(%rsp)
movq %r14,472(%rsp)
// T5 = T5^2
movq 408(%rsp),%rbx
movq 416(%rsp),%rbp
movq 424(%rsp),%rcx
movq 432(%rsp),%rsi
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,408(%rsp)
movq %r10,416(%rsp)
movq %r12,424(%rsp)
movq %r14,432(%rsp)
// X3
movq 184(%rsp),%r8
movq 192(%rsp),%r9
movq 200(%rsp),%r10
movq 208(%rsp),%r11
movq 216(%rsp),%r12
// copy X3
movq %r8,%rbx
movq %r9,%rbp
movq %r10,%rcx
movq %r11,%rsi
movq %r12,%rdi
// T8 = X3 + Z3
addq 224(%rsp),%r8
adcq 232(%rsp),%r9
adcq 240(%rsp),%r10
adcq 248(%rsp),%r11
adcq 256(%rsp),%r12
shld $1,%r11,%r12
andq mask63(%rip),%r11
imul $19,%r12,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,528(%rsp)
movq %r9,536(%rsp)
movq %r10,544(%rsp)
movq %r11,552(%rsp)
// T7 = X3 - Z3
addq twoexp8_p0(%rip),%rbx
adcq twoexp8_p123(%rip),%rbp
adcq twoexp8_p123(%rip),%rcx
adcq twoexp8_p123(%rip),%rsi
adcq twoexp8_p4(%rip),%rdi
subq 224(%rsp),%rbx
sbbq 232(%rsp),%rbp
sbbq 240(%rsp),%rcx
sbbq 248(%rsp),%rsi
sbbq 256(%rsp),%rdi
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rbx
adcq $0,%rbp
adcq $0,%rcx
adcq $0,%rsi
// T1 = T7^2
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,280(%rsp)
movq %r10,288(%rsp)
movq %r12,296(%rsp)
movq %r14,304(%rsp)
// X3 = T8^2
movq 528(%rsp),%rbx
movq 536(%rsp),%rbp
movq 544(%rsp),%rcx
movq 552(%rsp),%rsi
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// update X3
movq %r8,184(%rsp)
movq %r10,192(%rsp)
movq %r12,200(%rsp)
movq %r14,208(%rsp)
movq %r15,216(%rsp)
// T7 = T5 - T6
movq 408(%rsp),%r8
movq 416(%rsp),%r9
movq 424(%rsp),%r10
movq 432(%rsp),%r11
subq 448(%rsp),%r8
sbbq 456(%rsp),%r9
sbbq 464(%rsp),%r10
sbbq 472(%rsp),%r11
movq $0,%rdi
movq $38,%rcx
cmovae %rdi,%rcx
subq %rcx,%r8
sbbq %rdi,%r9
sbbq %rdi,%r10
sbbq %rdi,%r11
cmovc %rcx,%rdi
subq %rdi,%r8
movq %r8,488(%rsp)
movq %r9,496(%rsp)
movq %r10,504(%rsp)
movq %r11,512(%rsp)
// T8 = ((A + 2)/4) · T7
movq $121666,%rax
mulq %r8
movq %rax,%rbx
movq %rdx,%rsi
movq $0,%rdi
movq $121666,%rax
mulq %r9
addq %rax,%rsi
adcq %rdx,%rdi
movq $0,%rcx
movq $121666,%rax
mulq %r10
addq %rax,%rdi
adcq %rdx,%rcx
movq $121666,%rax
mulq %r11
addq %rax,%rcx
adcq $0,%rdx
// T8 = T8 + T6
addq 448(%rsp),%rbx
adcq 456(%rsp),%rsi
adcq 464(%rsp),%rdi
adcq 472(%rsp),%rcx
adcq $0,%rdx
shld $1,%rcx,%rdx
andq mask63(%rip),%rcx
imul $19,%rdx,%rdx
addq %rdx,%rbx
adcq $0,%rsi
adcq $0,%rdi
adcq $0,%rcx
movq %rbx,528(%rsp)
movq %rsi,536(%rsp)
movq %rdi,544(%rsp)
movq %rcx,552(%rsp)
// X2 = T5 · T6
movq 416(%rsp),%rax
mulq 472(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 424(%rsp),%rax
mulq 464(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 432(%rsp),%rax
mulq 456(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 424(%rsp),%rax
mulq 472(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 432(%rsp),%rax
mulq 464(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 432(%rsp),%rax
mulq 472(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 408(%rsp),%rax
mulq 472(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 416(%rsp),%rax
mulq 464(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 424(%rsp),%rax
mulq 456(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 432(%rsp),%rax
mulq 448(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 408(%rsp),%rax
mulq 448(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 408(%rsp),%rax
mulq 456(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 416(%rsp),%rax
mulq 448(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 408(%rsp),%rax
mulq 464(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 416(%rsp),%rax
mulq 456(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 424(%rsp),%rax
mulq 448(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// update X2
movq %r8,104(%rsp)
movq %r10,112(%rsp)
movq %r12,120(%rsp)
movq %r14,128(%rsp)
movq %r15,136(%rsp)
// Z2 = T7 · T8
movq 496(%rsp),%rax
mulq 552(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 504(%rsp),%rax
mulq 544(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 512(%rsp),%rax
mulq 536(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 504(%rsp),%rax
mulq 552(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 512(%rsp),%rax
mulq 544(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 512(%rsp),%rax
mulq 552(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 488(%rsp),%rax
mulq 552(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 496(%rsp),%rax
mulq 544(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 504(%rsp),%rax
mulq 536(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 512(%rsp),%rax
mulq 528(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 488(%rsp),%rax
mulq 528(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 488(%rsp),%rax
mulq 536(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 496(%rsp),%rax
mulq 528(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 488(%rsp),%rax
mulq 544(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 496(%rsp),%rax
mulq 536(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 504(%rsp),%rax
mulq 528(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// update Z2
movq %r8,144(%rsp)
movq %r10,152(%rsp)
movq %r12,160(%rsp)
movq %r14,168(%rsp)
movq %r15,176(%rsp)
// Z3 = T1 · X1
movq 80(%rsp),%rax
mulq 304(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 88(%rsp),%rax
mulq 296(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 96(%rsp),%rax
mulq 288(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 88(%rsp),%rax
mulq 304(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 96(%rsp),%rax
mulq 296(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 96(%rsp),%rax
mulq 304(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 72(%rsp),%rax
mulq 304(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 80(%rsp),%rax
mulq 296(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 88(%rsp),%rax
mulq 288(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 96(%rsp),%rax
mulq 280(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 72(%rsp),%rax
mulq 280(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 72(%rsp),%rax
mulq 288(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 80(%rsp),%rax
mulq 280(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 72(%rsp),%rax
mulq 296(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 80(%rsp),%rax
mulq 288(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 88(%rsp),%rax
mulq 280(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// update Z3
movq %r8,224(%rsp)
movq %r10,232(%rsp)
movq %r12,240(%rsp)
movq %r14,248(%rsp)
movq %r15,256(%rsp)
movq 272(%rsp),%rax
subq $1,%rax
movq %rax,272(%rsp)
cmpq $3,%rax
jge .L0
cmpb $0,264(%rsp)
// Z2 = CSelect(Z2,Z3,0,prevbit)
movq 144(%rsp),%rax
movq 152(%rsp),%rbx
movq 160(%rsp),%rcx
movq 168(%rsp),%rdx
movq 176(%rsp),%rsi
cmovne %r8,%rax
cmovne %r10,%rbx
cmovne %r12,%rcx
cmovne %r14,%rdx
cmovne %r15,%rsi
movq %rax,144(%rsp)
movq %rbx,152(%rsp)
movq %rcx,160(%rsp)
movq %rdx,168(%rsp)
movq %rsi,176(%rsp)
// X2 = CSelect(X2,X3,0,prevbit)
movq 104(%rsp),%r8
movq 112(%rsp),%r10
movq 120(%rsp),%r12
movq 128(%rsp),%r14
movq 136(%rsp),%r15
movq 184(%rsp),%rax
movq 192(%rsp),%rbx
movq 200(%rsp),%rcx
movq 208(%rsp),%rdx
movq 216(%rsp),%rsi
cmovne %rax,%r8
cmovne %rbx,%r10
cmovne %rcx,%r12
cmovne %rdx,%r14
cmovne %rsi,%r15
// post-process for the bit n[2] = 0
// copy X2
movq %r8,%rbx
movq %r10,%rbp
movq %r12,%rcx
movq %r14,%rsi
movq %r15,%rdi
// T1 = X2 + Z2
addq 144(%rsp),%r8
adcq 152(%rsp),%r10
adcq 160(%rsp),%r12
adcq 168(%rsp),%r14
adcq 176(%rsp),%r15
shld $1,%r14,%r15
andq mask63(%rip),%r14
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,280(%rsp)
movq %r10,288(%rsp)
movq %r12,296(%rsp)
movq %r14,304(%rsp)
// T2 = X2 - Z2
addq twoexp8_p0(%rip),%rbx
adcq twoexp8_p123(%rip),%rbp
adcq twoexp8_p123(%rip),%rcx
adcq twoexp8_p123(%rip),%rsi
adcq twoexp8_p4(%rip),%rdi
subq 144(%rsp),%rbx
sbbq 152(%rsp),%rbp
sbbq 160(%rsp),%rcx
sbbq 168(%rsp),%rsi
sbbq 176(%rsp),%rdi
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rbx
adcq $0,%rbp
adcq $0,%rcx
adcq $0,%rsi
// T2 = T2^2
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,312(%rsp)
movq %r10,320(%rsp)
movq %r12,328(%rsp)
movq %r14,336(%rsp)
// T1 = T1^2
movq 280(%rsp),%rbx
movq 288(%rsp),%rbp
movq 296(%rsp),%rcx
movq 304(%rsp),%rsi
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,280(%rsp)
movq %r10,288(%rsp)
movq %r12,296(%rsp)
movq %r14,304(%rsp)
// T3 = T1 - T2
subq 312(%rsp),%r8
sbbq 320(%rsp),%r10
sbbq 328(%rsp),%r12
sbbq 336(%rsp),%r14
movq $0,%rdi
movq $38,%rcx
cmovae %rdi,%rcx
subq %rcx,%r8
sbbq %rdi,%r10
sbbq %rdi,%r12
sbbq %rdi,%r14
cmovc %rcx,%rdi
subq %rdi,%r8
movq %r8,344(%rsp)
movq %r10,352(%rsp)
movq %r12,360(%rsp)
movq %r14,368(%rsp)
// T4 = ((A + 2)/4) · T3
movq $121666,%rax
mulq %r8
movq %rax,%rbx
movq %rdx,%rsi
movq $0,%rdi
movq $121666,%rax
mulq %r10
addq %rax,%rsi
adcq %rdx,%rdi
movq $0,%rcx
movq $121666,%rax
mulq %r12
addq %rax,%rdi
adcq %rdx,%rcx
movq $121666,%rax
mulq %r14
addq %rax,%rcx
adcq $0,%rdx
// T4 = T4 + T2
addq 312(%rsp),%rbx
adcq 320(%rsp),%rsi
adcq 328(%rsp),%rdi
adcq 336(%rsp),%rcx
adcq $0,%rdx
shld $1,%rcx,%rdx
andq mask63(%rip),%rcx
imul $19,%rdx,%rdx
addq %rdx,%rbx
adcq $0,%rsi
adcq $0,%rdi
adcq $0,%rcx
movq %rbx,376(%rsp)
movq %rsi,384(%rsp)
movq %rdi,392(%rsp)
movq %rcx,400(%rsp)
// Z2 = T3 · T4
movq 352(%rsp),%rax
mulq 400(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 360(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 368(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 360(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 368(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 368(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 344(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 352(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 360(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 368(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 344(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 344(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 352(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 344(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 352(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 360(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// update Z2
movq %r8,144(%rsp)
movq %r10,152(%rsp)
movq %r12,160(%rsp)
movq %r14,168(%rsp)
movq %r15,176(%rsp)
// X2 = T1 · T2
movq 288(%rsp),%rax
mulq 336(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 296(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 304(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 296(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 304(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 304(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 280(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 288(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 296(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 304(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 280(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 280(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 288(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 280(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 288(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 296(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// post-process for the bit n[1] = 0
// copy X2
movq %r8,%rbx
movq %r10,%rbp
movq %r12,%rcx
movq %r14,%rsi
movq %r15,%rdi
// T1 = X2 + Z2
addq 144(%rsp),%r8
adcq 152(%rsp),%r10
adcq 160(%rsp),%r12
adcq 168(%rsp),%r14
adcq 176(%rsp),%r15
shld $1,%r14,%r15
andq mask63(%rip),%r14
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,280(%rsp)
movq %r10,288(%rsp)
movq %r12,296(%rsp)
movq %r14,304(%rsp)
// T2 = X2 - Z2
addq twoexp8_p0(%rip),%rbx
adcq twoexp8_p123(%rip),%rbp
adcq twoexp8_p123(%rip),%rcx
adcq twoexp8_p123(%rip),%rsi
adcq twoexp8_p4(%rip),%rdi
subq 144(%rsp),%rbx
sbbq 152(%rsp),%rbp
sbbq 160(%rsp),%rcx
sbbq 168(%rsp),%rsi
sbbq 176(%rsp),%rdi
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rbx
adcq $0,%rbp
adcq $0,%rcx
adcq $0,%rsi
// T2 = T2^2
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,312(%rsp)
movq %r10,320(%rsp)
movq %r12,328(%rsp)
movq %r14,336(%rsp)
// T1 = T1^2
movq 280(%rsp),%rbx
movq 288(%rsp),%rbp
movq 296(%rsp),%rcx
movq 304(%rsp),%rsi
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,280(%rsp)
movq %r10,288(%rsp)
movq %r12,296(%rsp)
movq %r14,304(%rsp)
// T3 = T1 - T2
subq 312(%rsp),%r8
sbbq 320(%rsp),%r10
sbbq 328(%rsp),%r12
sbbq 336(%rsp),%r14
movq $0,%rdi
movq $38,%rcx
cmovae %rdi,%rcx
subq %rcx,%r8
sbbq %rdi,%r10
sbbq %rdi,%r12
sbbq %rdi,%r14
cmovc %rcx,%rdi
subq %rdi,%r8
movq %r8,344(%rsp)
movq %r10,352(%rsp)
movq %r12,360(%rsp)
movq %r14,368(%rsp)
// T4 = ((A + 2)/4) · T3
movq $121666,%rax
mulq %r8
movq %rax,%rbx
movq %rdx,%rsi
movq $0,%rdi
movq $121666,%rax
mulq %r10
addq %rax,%rsi
adcq %rdx,%rdi
movq $0,%rcx
movq $121666,%rax
mulq %r12
addq %rax,%rdi
adcq %rdx,%rcx
movq $121666,%rax
mulq %r14
addq %rax,%rcx
adcq $0,%rdx
// T4 = T4 + T2
addq 312(%rsp),%rbx
adcq 320(%rsp),%rsi
adcq 328(%rsp),%rdi
adcq 336(%rsp),%rcx
adcq $0,%rdx
shld $1,%rcx,%rdx
andq mask63(%rip),%rcx
imul $19,%rdx,%rdx
addq %rdx,%rbx
adcq $0,%rsi
adcq $0,%rdi
adcq $0,%rcx
movq %rbx,376(%rsp)
movq %rsi,384(%rsp)
movq %rdi,392(%rsp)
movq %rcx,400(%rsp)
// Z2 = T3 · T4
movq 352(%rsp),%rax
mulq 400(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 360(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 368(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 360(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 368(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 368(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 344(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 352(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 360(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 368(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 344(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 344(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 352(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 344(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 352(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 360(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// update Z2
movq %r8,144(%rsp)
movq %r10,152(%rsp)
movq %r12,160(%rsp)
movq %r14,168(%rsp)
movq %r15,176(%rsp)
// X2 = T1 · T2
movq 288(%rsp),%rax
mulq 336(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 296(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 304(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 296(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 304(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 304(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 280(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 288(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 296(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 304(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 280(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 280(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 288(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 280(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 288(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 296(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
// post-process for the bit n[0] = 0
// copy X2
movq %r8,%rbx
movq %r10,%rbp
movq %r12,%rcx
movq %r14,%rsi
movq %r15,%rdi
// T1 = X2 + Z2
addq 144(%rsp),%r8
adcq 152(%rsp),%r10
adcq 160(%rsp),%r12
adcq 168(%rsp),%r14
adcq 176(%rsp),%r15
shld $1,%r14,%r15
andq mask63(%rip),%r14
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,280(%rsp)
movq %r10,288(%rsp)
movq %r12,296(%rsp)
movq %r14,304(%rsp)
// T2 = X2 - Z2
addq twoexp8_p0(%rip),%rbx
adcq twoexp8_p123(%rip),%rbp
adcq twoexp8_p123(%rip),%rcx
adcq twoexp8_p123(%rip),%rsi
adcq twoexp8_p4(%rip),%rdi
subq 144(%rsp),%rbx
sbbq 152(%rsp),%rbp
sbbq 160(%rsp),%rcx
sbbq 168(%rsp),%rsi
sbbq 176(%rsp),%rdi
shld $1,%rsi,%rdi
andq mask63(%rip),%rsi
imul $19,%rdi,%rdi
addq %rdi,%rbx
adcq $0,%rbp
adcq $0,%rcx
adcq $0,%rsi
// T2 = T2^2
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,312(%rsp)
movq %r10,320(%rsp)
movq %r12,328(%rsp)
movq %r14,336(%rsp)
// T1 = T1^2
movq 280(%rsp),%rbx
movq 288(%rsp),%rbp
movq 296(%rsp),%rcx
movq 304(%rsp),%rsi
movq %rsi,%rax
mulq %rsi
movq %rax,%r12
xorq %r13,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq %rbp,%rax
mulq %rsi
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rcx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rcx,%rax
mulq %rsi
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq %rbx,%rax
mulq %rsi
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq %rbp,%rax
mulq %rcx
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq %rbx,%rax
mulq %rbx
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq %rbx,%rax
mulq %rbp
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq %rbx,%rax
mulq %rcx
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq %rbp,%rax
mulq %rbp
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
imul $19,%r15,%r15
andq mask63(%rip),%r14
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
movq %r8,280(%rsp)
movq %r10,288(%rsp)
movq %r12,296(%rsp)
movq %r14,304(%rsp)
// T3 = T1 - T2
subq 312(%rsp),%r8
sbbq 320(%rsp),%r10
sbbq 328(%rsp),%r12
sbbq 336(%rsp),%r14
movq $0,%rdi
movq $38,%rcx
cmovae %rdi,%rcx
subq %rcx,%r8
sbbq %rdi,%r10
sbbq %rdi,%r12
sbbq %rdi,%r14
cmovc %rcx,%rdi
subq %rdi,%r8
movq %r8,344(%rsp)
movq %r10,352(%rsp)
movq %r12,360(%rsp)
movq %r14,368(%rsp)
// T4 = ((A + 2)/4) · T3
movq $121666,%rax
mulq %r8
movq %rax,%rbx
movq %rdx,%rsi
movq $0,%rdi
movq $121666,%rax
mulq %r10
addq %rax,%rsi
adcq %rdx,%rdi
movq $0,%rcx
movq $121666,%rax
mulq %r12
addq %rax,%rdi
adcq %rdx,%rcx
movq $121666,%rax
mulq %r14
addq %rax,%rcx
adcq $0,%rdx
// T4 = T4 + T2
addq 312(%rsp),%rbx
adcq 320(%rsp),%rsi
adcq 328(%rsp),%rdi
adcq 336(%rsp),%rcx
adcq $0,%rdx
shld $1,%rcx,%rdx
andq mask63(%rip),%rcx
imul $19,%rdx,%rdx
addq %rdx,%rbx
adcq $0,%rsi
adcq $0,%rdi
adcq $0,%rcx
movq %rbx,376(%rsp)
movq %rsi,384(%rsp)
movq %rdi,392(%rsp)
movq %rcx,400(%rsp)
// Z2 = T3 · T4
movq 352(%rsp),%rax
mulq 400(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 360(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 368(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 360(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 368(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 368(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 344(%rsp),%rax
mulq 400(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 352(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 360(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 368(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 344(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 344(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 352(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 344(%rsp),%rax
mulq 392(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 352(%rsp),%rax
mulq 384(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 360(%rsp),%rax
mulq 376(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
andq mask63(%rip),%r14
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
// store final value of Z2
movq 56(%rsp),%rdi
movq %r8,32(%rdi)
movq %r10,40(%rdi)
movq %r12,48(%rdi)
movq %r14,56(%rdi)
// X2 = T1 · T2
movq 288(%rsp),%rax
mulq 336(%rsp)
movq %rax,%r8
xorq %r9,%r9
movq %rdx,%r10
xorq %r11,%r11
movq 296(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 304(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 296(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r10
adcq $0,%r11
movq %rdx,%r12
xorq %r13,%r13
movq 304(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq $38,%rax
mulq %r10
imul $38,%r11,%r11
movq %rax,%r10
addq %rdx,%r11
movq 304(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r12
adcq $0,%r13
movq $38,%rax
mulq %rdx
movq %rax,%r14
movq %rdx,%r15
movq $38,%rax
mulq %r12
imul $38,%r13,%r13
movq %rax,%r12
addq %rdx,%r13
movq 280(%rsp),%rax
mulq 336(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 288(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 296(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq 304(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r14
adcq $0,%r15
addq %rdx,%r8
adcq $0,%r9
movq $38,%rax
mulq %r8
imul $38,%r9,%r9
movq %rax,%r8
addq %rdx,%r9
movq 280(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r8
adcq $0,%r9
addq %rdx,%r10
adcq $0,%r11
movq 280(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 288(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r10
adcq $0,%r11
addq %rdx,%r12
adcq $0,%r13
movq 280(%rsp),%rax
mulq 328(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 288(%rsp),%rax
mulq 320(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
movq 296(%rsp),%rax
mulq 312(%rsp)
addq %rax,%r12
adcq $0,%r13
addq %rdx,%r14
adcq $0,%r15
addq %r9,%r10
adcq $0,%r11
addq %r11,%r12
adcq $0,%r13
addq %r13,%r14
adcq $0,%r15
shld $1,%r14,%r15
andq mask63(%rip),%r14
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r10
adcq $0,%r12
adcq $0,%r14
// store final value of X2
movq %r8,0(%rdi)
movq %r10,8(%rdi)
movq %r12,16(%rdi)
movq %r14,24(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
movq %r11,%rsp
ret