-rw-r--r-- 31111 lib25519-20240321/crypto_nG/merged25519/amd64-maax-opt/ge25519_base.S raw
#include "crypto_asm_hidden.h"
// linker define base
// linker use mask63
// linker use twoexp8_p0
// linker use twoexp8_p123
// linker use twoexp8_p4
/* Assembly for fixed base scalar multiplication.
*
* This assembly has been developed after studying the
* amd64-64-24k implementation of the work "High speed
* high security signatures" by Bernstein et al.
*/
#define mask63 CRYPTO_SHARED_NAMESPACE(mask63)
#define twoexp8_p0 CRYPTO_SHARED_NAMESPACE(twoexp8_p0)
#define twoexp8_p123 CRYPTO_SHARED_NAMESPACE(twoexp8_p123)
#define twoexp8_p4 CRYPTO_SHARED_NAMESPACE(twoexp8_p4)
.p2align 5
ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(base)
.globl _CRYPTO_SHARED_NAMESPACE(base)
ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(base)
.globl CRYPTO_SHARED_NAMESPACE(base)
_CRYPTO_SHARED_NAMESPACE(base):
CRYPTO_SHARED_NAMESPACE(base):
movq %rsp,%r11
andq $-32,%rsp
subq $496,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq %rdi,56(%rsp)
movq %rsi,64(%rsp)
movq %rdx,72(%rsp)
/* choose t and initialize r */
movq %rdx,%rcx
movzbl 0(%rsi),%eax
movsbq %al,%rdx
movq $0,%rsi
imulq $768,%rsi,%rdi
movq %rdx,%rsi
sarq $7,%rsi
movq %rdx,%r8
addq %rsi,%r8
xorq %rsi,%r8
movq $1,%rsi
movq $0,%r9
movq $0,%rax
movq $0,%r10
movq $1,%r11
movq $0,%r12
movq $0,%r13
movq $0,%r14
cmpq $1,%r8
movq 0(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 8(%rcx,%rdi),%r15
cmove %r15,%r9
movq 16(%rcx,%rdi),%r15
cmove %r15,%rax
movq 24(%rcx,%rdi),%r15
cmove %r15,%r10
movq 32(%rcx,%rdi),%r15
cmove %r15,%r11
movq 40(%rcx,%rdi),%r15
cmove %r15,%r12
movq 48(%rcx,%rdi),%r15
cmove %r15,%r13
movq 56(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $2,%r8
movq 96(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 104(%rcx,%rdi),%r15
cmove %r15,%r9
movq 112(%rcx,%rdi),%r15
cmove %r15,%rax
movq 120(%rcx,%rdi),%r15
cmove %r15,%r10
movq 128(%rcx,%rdi),%r15
cmove %r15,%r11
movq 136(%rcx,%rdi),%r15
cmove %r15,%r12
movq 144(%rcx,%rdi),%r15
cmove %r15,%r13
movq 152(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $3,%r8
movq 192(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 200(%rcx,%rdi),%r15
cmove %r15,%r9
movq 208(%rcx,%rdi),%r15
cmove %r15,%rax
movq 216(%rcx,%rdi),%r15
cmove %r15,%r10
movq 224(%rcx,%rdi),%r15
cmove %r15,%r11
movq 232(%rcx,%rdi),%r15
cmove %r15,%r12
movq 240(%rcx,%rdi),%r15
cmove %r15,%r13
movq 248(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $4,%r8
movq 288(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 296(%rcx,%rdi),%r15
cmove %r15,%r9
movq 304(%rcx,%rdi),%r15
cmove %r15,%rax
movq 312(%rcx,%rdi),%r15
cmove %r15,%r10
movq 320(%rcx,%rdi),%r15
cmove %r15,%r11
movq 328(%rcx,%rdi),%r15
cmove %r15,%r12
movq 336(%rcx,%rdi),%r15
cmove %r15,%r13
movq 344(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $5,%r8
movq 384(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 392(%rcx,%rdi),%r15
cmove %r15,%r9
movq 400(%rcx,%rdi),%r15
cmove %r15,%rax
movq 408(%rcx,%rdi),%r15
cmove %r15,%r10
movq 416(%rcx,%rdi),%r15
cmove %r15,%r11
movq 424(%rcx,%rdi),%r15
cmove %r15,%r12
movq 432(%rcx,%rdi),%r15
cmove %r15,%r13
movq 440(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $6,%r8
movq 480(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 488(%rcx,%rdi),%r15
cmove %r15,%r9
movq 496(%rcx,%rdi),%r15
cmove %r15,%rax
movq 504(%rcx,%rdi),%r15
cmove %r15,%r10
movq 512(%rcx,%rdi),%r15
cmove %r15,%r11
movq 520(%rcx,%rdi),%r15
cmove %r15,%r12
movq 528(%rcx,%rdi),%r15
cmove %r15,%r13
movq 536(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $7,%r8
movq 576(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 584(%rcx,%rdi),%r15
cmove %r15,%r9
movq 592(%rcx,%rdi),%r15
cmove %r15,%rax
movq 600(%rcx,%rdi),%r15
cmove %r15,%r10
movq 608(%rcx,%rdi),%r15
cmove %r15,%r11
movq 616(%rcx,%rdi),%r15
cmove %r15,%r12
movq 624(%rcx,%rdi),%r15
cmove %r15,%r13
movq 632(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $8,%r8
movq 672(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 680(%rcx,%rdi),%r15
cmove %r15,%r9
movq 688(%rcx,%rdi),%r15
cmove %r15,%rax
movq 696(%rcx,%rdi),%r15
cmove %r15,%r10
movq 704(%rcx,%rdi),%r15
cmove %r15,%r11
movq 712(%rcx,%rdi),%r15
cmove %r15,%r12
movq 720(%rcx,%rdi),%r15
cmove %r15,%r13
movq 728(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $0,%rdx
movq %rsi,%r15
cmovl %r11,%rsi
cmovl %r15,%r11
movq %r9,%r15
cmovl %r12,%r9
cmovl %r15,%r12
movq %rax,%r15
cmovl %r13,%rax
cmovl %r15,%r13
movq %r10,%r15
cmovl %r14,%r10
cmovl %r15,%r14
movq %r11,%r15
movq %r12,80(%rsp)
movq %r13,88(%rsp)
movq %r14,96(%rsp)
// sub
subq %rsi,%r11
sbbq %r9,%r12
sbbq %rax,%r13
sbbq %r10,%r14
movq $0,%rbx
movq $38,%rbp
cmovae %rbx,%rbp
subq %rbp,%r11
sbbq %rbx,%r12
sbbq %rbx,%r13
sbbq %rbx,%r14
cmovc %rbp,%rbx
subq %rbx,%r11
movq %r11,104(%rsp)
movq %r12,112(%rsp)
movq %r13,120(%rsp)
movq %r14,128(%rsp)
movq $0,136(%rsp)
// add
addq %r15,%rsi
adcq 80(%rsp),%r9
adcq 88(%rsp),%rax
adcq 96(%rsp),%r10
movq $0,%rbx
movq $38,%rbp
cmovae %rbx,%rbp
addq %rbp,%rsi
adcq %rbx,%r9
adcq %rbx,%rax
adcq %rbx,%r10
cmovc %rbp,%rbx
addq %rbx,%rsi
movq %rsi,144(%rsp)
movq %r9,152(%rsp)
movq %rax,160(%rsp)
movq %r10,168(%rsp)
movq $0,176(%rsp)
movq $0,%rsi
movq $0,%r9
movq $0,%rax
movq $0,%r10
cmpq $1,%r8
movq 64(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 72(%rcx,%rdi),%r11
cmove %r11,%r9
movq 80(%rcx,%rdi),%r11
cmove %r11,%rax
movq 88(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $2,%r8
movq 160(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 168(%rcx,%rdi),%r11
cmove %r11,%r9
movq 176(%rcx,%rdi),%r11
cmove %r11,%rax
movq 184(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $3,%r8
movq 256(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 264(%rcx,%rdi),%r11
cmove %r11,%r9
movq 272(%rcx,%rdi),%r11
cmove %r11,%rax
movq 280(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $4,%r8
movq 352(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 360(%rcx,%rdi),%r11
cmove %r11,%r9
movq 368(%rcx,%rdi),%r11
cmove %r11,%rax
movq 376(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $5,%r8
movq 448(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 456(%rcx,%rdi),%r11
cmove %r11,%r9
movq 464(%rcx,%rdi),%r11
cmove %r11,%rax
movq 472(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $6,%r8
movq 544(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 552(%rcx,%rdi),%r11
cmove %r11,%r9
movq 560(%rcx,%rdi),%r11
cmove %r11,%rax
movq 568(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $7,%r8
movq 640(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 648(%rcx,%rdi),%r11
cmove %r11,%r9
movq 656(%rcx,%rdi),%r11
cmove %r11,%rax
movq 664(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $8,%r8
movq 736(%rcx,%rdi),%r8
cmove %r8,%rsi
movq 744(%rcx,%rdi),%r8
cmove %r8,%r9
movq 752(%rcx,%rdi),%r8
cmove %r8,%rax
movq 760(%rcx,%rdi),%rdi
cmove %rdi,%r10
movq $0,%rdi
movq $0,%rcx
movq $0,%r8
movq $0,%r11
subq %rsi,%rdi
sbbq %r9,%rcx
sbbq %rax,%r8
sbbq %r10,%r11
movq $0,%r12
movq $38,%r13
cmovae %r12,%r13
subq %r13,%rdi
sbbq %r12,%rcx
sbbq %r12,%r8
sbbq %r12,%r11
cmovc %r13,%r12
subq %r12,%rdi
cmpq $0,%rdx
cmovl %rdi,%rsi
cmovl %rcx,%r9
cmovl %r8,%rax
cmovl %r11,%r10
movq %rsi,224(%rsp)
movq %r9,232(%rsp)
movq %rax,240(%rsp)
movq %r10,248(%rsp)
movq $2,184(%rsp)
movq $0,192(%rsp)
movq $0,200(%rsp)
movq $0,208(%rsp)
movq $0,216(%rsp)
/* loop: i=1,i<64,i=i+1 */
movq $1,256(%rsp)
.L:
/* choose t */
movq 72(%rsp),%rcx
movq 256(%rsp),%rsi
movq 64(%rsp),%rdx
movzbl 0(%rdx,%rsi),%eax
movsbq %al,%rdx
imulq $768,%rsi,%rdi
movq %rdx,%rsi
sarq $7,%rsi
movq %rdx,%r8
addq %rsi,%r8
xorq %rsi,%r8
movq $1,%rsi
movq $0,%r9
movq $0,%rax
movq $0,%r10
movq $1,%r11
movq $0,%r12
movq $0,%r13
movq $0,%r14
cmpq $1,%r8
movq 0(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 8(%rcx,%rdi),%r15
cmove %r15,%r9
movq 16(%rcx,%rdi),%r15
cmove %r15,%rax
movq 24(%rcx,%rdi),%r15
cmove %r15,%r10
movq 32(%rcx,%rdi),%r15
cmove %r15,%r11
movq 40(%rcx,%rdi),%r15
cmove %r15,%r12
movq 48(%rcx,%rdi),%r15
cmove %r15,%r13
movq 56(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $2,%r8
movq 96(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 104(%rcx,%rdi),%r15
cmove %r15,%r9
movq 112(%rcx,%rdi),%r15
cmove %r15,%rax
movq 120(%rcx,%rdi),%r15
cmove %r15,%r10
movq 128(%rcx,%rdi),%r15
cmove %r15,%r11
movq 136(%rcx,%rdi),%r15
cmove %r15,%r12
movq 144(%rcx,%rdi),%r15
cmove %r15,%r13
movq 152(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $3,%r8
movq 192(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 200(%rcx,%rdi),%r15
cmove %r15,%r9
movq 208(%rcx,%rdi),%r15
cmove %r15,%rax
movq 216(%rcx,%rdi),%r15
cmove %r15,%r10
movq 224(%rcx,%rdi),%r15
cmove %r15,%r11
movq 232(%rcx,%rdi),%r15
cmove %r15,%r12
movq 240(%rcx,%rdi),%r15
cmove %r15,%r13
movq 248(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $4,%r8
movq 288(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 296(%rcx,%rdi),%r15
cmove %r15,%r9
movq 304(%rcx,%rdi),%r15
cmove %r15,%rax
movq 312(%rcx,%rdi),%r15
cmove %r15,%r10
movq 320(%rcx,%rdi),%r15
cmove %r15,%r11
movq 328(%rcx,%rdi),%r15
cmove %r15,%r12
movq 336(%rcx,%rdi),%r15
cmove %r15,%r13
movq 344(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $5,%r8
movq 384(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 392(%rcx,%rdi),%r15
cmove %r15,%r9
movq 400(%rcx,%rdi),%r15
cmove %r15,%rax
movq 408(%rcx,%rdi),%r15
cmove %r15,%r10
movq 416(%rcx,%rdi),%r15
cmove %r15,%r11
movq 424(%rcx,%rdi),%r15
cmove %r15,%r12
movq 432(%rcx,%rdi),%r15
cmove %r15,%r13
movq 440(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $6,%r8
movq 480(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 488(%rcx,%rdi),%r15
cmove %r15,%r9
movq 496(%rcx,%rdi),%r15
cmove %r15,%rax
movq 504(%rcx,%rdi),%r15
cmove %r15,%r10
movq 512(%rcx,%rdi),%r15
cmove %r15,%r11
movq 520(%rcx,%rdi),%r15
cmove %r15,%r12
movq 528(%rcx,%rdi),%r15
cmove %r15,%r13
movq 536(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $7,%r8
movq 576(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 584(%rcx,%rdi),%r15
cmove %r15,%r9
movq 592(%rcx,%rdi),%r15
cmove %r15,%rax
movq 600(%rcx,%rdi),%r15
cmove %r15,%r10
movq 608(%rcx,%rdi),%r15
cmove %r15,%r11
movq 616(%rcx,%rdi),%r15
cmove %r15,%r12
movq 624(%rcx,%rdi),%r15
cmove %r15,%r13
movq 632(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $8,%r8
movq 672(%rcx,%rdi),%r15
cmove %r15,%rsi
movq 680(%rcx,%rdi),%r15
cmove %r15,%r9
movq 688(%rcx,%rdi),%r15
cmove %r15,%rax
movq 696(%rcx,%rdi),%r15
cmove %r15,%r10
movq 704(%rcx,%rdi),%r15
cmove %r15,%r11
movq 712(%rcx,%rdi),%r15
cmove %r15,%r12
movq 720(%rcx,%rdi),%r15
cmove %r15,%r13
movq 728(%rcx,%rdi),%r15
cmove %r15,%r14
cmpq $0,%rdx
movq %rsi,%r15
cmovl %r11,%rsi
cmovl %r15,%r11
movq %r9,%r15
cmovl %r12,%r9
cmovl %r15,%r12
movq %rax,%r15
cmovl %r13,%rax
cmovl %r15,%r13
movq %r10,%r15
cmovl %r14,%r10
cmovl %r15,%r14
movq %rsi,264(%rsp)
movq %r9,272(%rsp)
movq %rax,280(%rsp)
movq %r10,288(%rsp)
movq %r11,296(%rsp)
movq %r12,304(%rsp)
movq %r13,312(%rsp)
movq %r14,320(%rsp)
movq $0,%rsi
movq $0,%r9
movq $0,%rax
movq $0,%r10
cmpq $1,%r8
movq 64(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 72(%rcx,%rdi),%r11
cmove %r11,%r9
movq 80(%rcx,%rdi),%r11
cmove %r11,%rax
movq 88(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $2,%r8
movq 160(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 168(%rcx,%rdi),%r11
cmove %r11,%r9
movq 176(%rcx,%rdi),%r11
cmove %r11,%rax
movq 184(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $3,%r8
movq 256(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 264(%rcx,%rdi),%r11
cmove %r11,%r9
movq 272(%rcx,%rdi),%r11
cmove %r11,%rax
movq 280(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $4,%r8
movq 352(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 360(%rcx,%rdi),%r11
cmove %r11,%r9
movq 368(%rcx,%rdi),%r11
cmove %r11,%rax
movq 376(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $5,%r8
movq 448(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 456(%rcx,%rdi),%r11
cmove %r11,%r9
movq 464(%rcx,%rdi),%r11
cmove %r11,%rax
movq 472(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $6,%r8
movq 544(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 552(%rcx,%rdi),%r11
cmove %r11,%r9
movq 560(%rcx,%rdi),%r11
cmove %r11,%rax
movq 568(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $7,%r8
movq 640(%rcx,%rdi),%r11
cmove %r11,%rsi
movq 648(%rcx,%rdi),%r11
cmove %r11,%r9
movq 656(%rcx,%rdi),%r11
cmove %r11,%rax
movq 664(%rcx,%rdi),%r11
cmove %r11,%r10
cmpq $8,%r8
movq 736(%rcx,%rdi),%r8
cmove %r8,%rsi
movq 744(%rcx,%rdi),%r8
cmove %r8,%r9
movq 752(%rcx,%rdi),%r8
cmove %r8,%rax
movq 760(%rcx,%rdi),%rdi
cmove %rdi,%r10
movq $0,%rdi
movq $0,%rcx
movq $0,%r8
movq $0,%r11
subq %rsi,%rdi
sbbq %r9,%rcx
sbbq %rax,%r8
sbbq %r10,%r11
movq $0,%r12
movq $38,%r13
cmovae %r12,%r13
subq %r13,%rdi
sbbq %r12,%rcx
sbbq %r12,%r8
sbbq %r12,%r11
cmovc %r13,%r12
subq %r12,%rdi
cmpq $0,%rdx
cmovl %rdi,%rsi
cmovl %rcx,%r9
cmovl %r8,%rax
cmovl %r11,%r10
movq %rsi,328(%rsp)
movq %r9,336(%rsp)
movq %rax,344(%rsp)
movq %r10,352(%rsp)
/* nielsadd2 */
// load
movq 144(%rsp),%rdx
movq 152(%rsp),%rcx
movq 160(%rsp),%r8
movq 168(%rsp),%r9
movq 176(%rsp),%r13
// copy
movq %rdx,%rax
movq %rcx,%r10
movq %r8,%r11
movq %r9,%r12
movq %r13,%r14
// sub
addq twoexp8_p0(%rip),%rdx
adcq twoexp8_p123(%rip),%rcx
adcq twoexp8_p123(%rip),%r8
adcq twoexp8_p123(%rip),%r9
adcq twoexp8_p4(%rip),%r13
subq 104(%rsp),%rdx
sbbq 112(%rsp),%rcx
sbbq 120(%rsp),%r8
sbbq 128(%rsp),%r9
sbbq 136(%rsp),%r13
shld $1,%r9,%r13
andq mask63(%rip),%r9
imul $19,%r13,%r13
addq %r13,%rdx
adcq $0,%rcx
adcq $0,%r8
adcq $0,%r9
movq %rdx,360(%rsp)
movq %rcx,368(%rsp)
movq %r8,376(%rsp)
movq %r9,384(%rsp)
// add
addq 104(%rsp),%rax
adcq 112(%rsp),%r10
adcq 120(%rsp),%r11
adcq 128(%rsp),%r12
adcq 136(%rsp),%r14
shld $1,%r12,%r14
andq mask63(%rip),%r12
imul $19,%r14,%r14
addq %r14,%rax
adcq $0,%r10
adcq $0,%r11
adcq $0,%r12
movq %rax,400(%rsp)
movq %r10,408(%rsp)
movq %r11,416(%rsp)
movq %r12,424(%rsp)
// mul
xorq %r13,%r13
movq 264(%rsp),%rdx
mulx 360(%rsp),%r8,%r9
mulx 368(%rsp),%rcx,%r10
adcx %rcx,%r9
mulx 376(%rsp),%rcx,%r11
adcx %rcx,%r10
mulx 384(%rsp),%rcx,%r12
adcx %rcx,%r11
adcx %r13,%r12
xorq %r14,%r14
movq 272(%rsp),%rdx
mulx 360(%rsp),%rcx,%rbp
adcx %rcx,%r9
adox %rbp,%r10
mulx 368(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 376(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 384(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
adcx %r14,%r13
xorq %r15,%r15
movq 280(%rsp),%rdx
mulx 360(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 368(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 376(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 384(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
adcx %r15,%r14
xorq %rax,%rax
movq 288(%rsp),%rdx
mulx 360(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 368(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 376(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
mulx 384(%rsp),%rcx,%rbp
adcx %rcx,%r14
adox %rbp,%r15
adcx %rax,%r15
xorq %rbp,%rbp
movq $38,%rdx
mulx %r12,%rax,%r12
adcx %rax,%r8
adox %r12,%r9
mulx %r13,%rcx,%r13
adcx %rcx,%r9
adox %r13,%r10
mulx %r14,%rcx,%r14
adcx %rcx,%r10
adox %r14,%r11
mulx %r15,%rcx,%r15
adcx %rcx,%r11
adox %rbp,%r15
adcx %rbp,%r15
movq %r8,360(%rsp)
movq %r9,368(%rsp)
movq %r10,376(%rsp)
movq %r11,384(%rsp)
movq %r15,392(%rsp)
// mul
xorq %r13,%r13
movq 296(%rsp),%rdx
mulx 400(%rsp),%r8,%r9
mulx 408(%rsp),%rcx,%r10
adcx %rcx,%r9
mulx 416(%rsp),%rcx,%r11
adcx %rcx,%r10
mulx 424(%rsp),%rcx,%r12
adcx %rcx,%r11
adcx %r13,%r12
xorq %r14,%r14
movq 304(%rsp),%rdx
mulx 400(%rsp),%rcx,%rbp
adcx %rcx,%r9
adox %rbp,%r10
mulx 408(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 416(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 424(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
adcx %r14,%r13
xorq %r15,%r15
movq 312(%rsp),%rdx
mulx 400(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 408(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 416(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 424(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
adcx %r15,%r14
xorq %rax,%rax
movq 320(%rsp),%rdx
mulx 400(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 408(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 416(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
mulx 424(%rsp),%rcx,%rbp
adcx %rcx,%r14
adox %rbp,%r15
adcx %rax,%r15
xorq %rbp,%rbp
movq $38,%rdx
mulx %r12,%rax,%r12
adcx %rax,%r8
adox %r12,%r9
mulx %r13,%rcx,%r13
adcx %rcx,%r9
adox %r13,%r10
mulx %r14,%rcx,%r14
adcx %rcx,%r10
adox %r14,%r11
mulx %r15,%rcx,%r15
adcx %rcx,%r11
adox %rbp,%r15
adcx %rbp,%r15
// copy
movq %r8,%r12
movq %r9,%r13
movq %r10,%r14
movq %r11,%rax
movq %r15,%rcx
// sub
addq twoexp8_p0(%rip),%r8
adcq twoexp8_p123(%rip),%r9
adcq twoexp8_p123(%rip),%r10
adcq twoexp8_p123(%rip),%r11
adcq twoexp8_p4(%rip),%r15
subq 360(%rsp),%r8
sbbq 368(%rsp),%r9
sbbq 376(%rsp),%r10
sbbq 384(%rsp),%r11
sbbq 392(%rsp),%r15
shld $1,%r11,%r15
andq mask63(%rip),%r11
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,400(%rsp)
movq %r9,408(%rsp)
movq %r10,416(%rsp)
movq %r11,424(%rsp)
// add
addq 360(%rsp),%r12
adcq 368(%rsp),%r13
adcq 376(%rsp),%r14
adcq 384(%rsp),%rax
adcq 392(%rsp),%rcx
shld $1,%rax,%rcx
andq mask63(%rip),%rax
imul $19,%rcx,%rcx
addq %rcx,%r12
adcq $0,%r13
adcq $0,%r14
adcq $0,%rax
movq %r12,360(%rsp)
movq %r13,368(%rsp)
movq %r14,376(%rsp)
movq %rax,384(%rsp)
// mul
xorq %r13,%r13
movq 328(%rsp),%rdx
mulx 224(%rsp),%r8,%r9
mulx 232(%rsp),%rcx,%r10
adcx %rcx,%r9
mulx 240(%rsp),%rcx,%r11
adcx %rcx,%r10
mulx 248(%rsp),%rcx,%r12
adcx %rcx,%r11
adcx %r13,%r12
xorq %r14,%r14
movq 336(%rsp),%rdx
mulx 224(%rsp),%rcx,%rbp
adcx %rcx,%r9
adox %rbp,%r10
mulx 232(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 240(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 248(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
adcx %r14,%r13
xorq %r15,%r15
movq 344(%rsp),%rdx
mulx 224(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 232(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 240(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 248(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
adcx %r15,%r14
xorq %rax,%rax
movq 352(%rsp),%rdx
mulx 224(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 232(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 240(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
mulx 248(%rsp),%rcx,%rbp
adcx %rcx,%r14
adox %rbp,%r15
adcx %rax,%r15
xorq %rbp,%rbp
movq $38,%rdx
mulx %r12,%rax,%r12
adcx %rax,%r8
adox %r12,%r9
mulx %r13,%rcx,%r13
adcx %rcx,%r9
adox %r13,%r10
mulx %r14,%rcx,%r14
adcx %rcx,%r10
adox %r14,%r11
mulx %r15,%rcx,%r15
adcx %rcx,%r11
adox %rbp,%r15
adcx %rbp,%r15
movq 184(%rsp),%r12
movq 192(%rsp),%r13
movq 200(%rsp),%r14
movq 208(%rsp),%rax
movq 216(%rsp),%rbx
// double
addq %r12,%r12
adcq %r13,%r13
adcq %r14,%r14
adcq %rax,%rax
adcq %rbx,%rbx
// copy
movq %r12,%rsi
movq %r13,%rcx
movq %r14,%rdx
movq %rax,%rbp
movq %rbx,%rdi
// sub
addq twoexp8_p0(%rip),%r12
adcq twoexp8_p123(%rip),%r13
adcq twoexp8_p123(%rip),%r14
adcq twoexp8_p123(%rip),%rax
adcq twoexp8_p4(%rip),%rbx
subq %r8,%r12
sbbq %r9,%r13
sbbq %r10,%r14
sbbq %r11,%rax
sbbq %r15,%rbx
shld $1,%rax,%rbx
andq mask63(%rip),%rax
imul $19,%rbx,%rbx
addq %rbx,%r12
adcq $0,%r13
adcq $0,%r14
adcq $0,%rax
movq %r12,464(%rsp)
movq %r13,472(%rsp)
movq %r14,480(%rsp)
movq %rax,488(%rsp)
// add
addq %r8,%rsi
adcq %r9,%rcx
adcq %r10,%rdx
adcq %r11,%rbp
adcq %r15,%rdi
shld $1,%rbp,%rdi
andq mask63(%rip),%rbp
imul $19,%rdi,%rdi
addq %rdi,%rsi
adcq $0,%rcx
adcq $0,%rdx
adcq $0,%rbp
movq %rsi,432(%rsp)
movq %rcx,440(%rsp)
movq %rdx,448(%rsp)
movq %rbp,456(%rsp)
/* p1p1 to p3 */
// mul
xorq %r13,%r13
movq 400(%rsp),%rdx
mulx 464(%rsp),%r8,%r9
mulx 472(%rsp),%rcx,%r10
adcx %rcx,%r9
mulx 480(%rsp),%rcx,%r11
adcx %rcx,%r10
mulx 488(%rsp),%rcx,%r12
adcx %rcx,%r11
adcx %r13,%r12
xorq %r14,%r14
movq 408(%rsp),%rdx
mulx 464(%rsp),%rcx,%rbp
adcx %rcx,%r9
adox %rbp,%r10
mulx 472(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 480(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 488(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
adcx %r14,%r13
xorq %r15,%r15
movq 416(%rsp),%rdx
mulx 464(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 472(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 480(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 488(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
adcx %r15,%r14
xorq %rax,%rax
movq 424(%rsp),%rdx
mulx 464(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 472(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 480(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
mulx 488(%rsp),%rcx,%rbp
adcx %rcx,%r14
adox %rbp,%r15
adcx %rax,%r15
xorq %rbp,%rbp
movq $38,%rdx
mulx %r12,%rax,%r12
adcx %rax,%r8
adox %r12,%r9
mulx %r13,%rcx,%r13
adcx %rcx,%r9
adox %r13,%r10
mulx %r14,%rcx,%r14
adcx %rcx,%r10
adox %r14,%r11
mulx %r15,%rcx,%r15
adcx %rcx,%r11
adox %rbp,%r15
adcx %rbp,%r15
movq %r8,104(%rsp)
movq %r9,112(%rsp)
movq %r10,120(%rsp)
movq %r11,128(%rsp)
movq %r15,136(%rsp)
// mul
xorq %r13,%r13
movq 360(%rsp),%rdx
mulx 432(%rsp),%r8,%r9
mulx 440(%rsp),%rcx,%r10
adcx %rcx,%r9
mulx 448(%rsp),%rcx,%r11
adcx %rcx,%r10
mulx 456(%rsp),%rcx,%r12
adcx %rcx,%r11
adcx %r13,%r12
xorq %r14,%r14
movq 368(%rsp),%rdx
mulx 432(%rsp),%rcx,%rbp
adcx %rcx,%r9
adox %rbp,%r10
mulx 440(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 448(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 456(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
adcx %r14,%r13
xorq %r15,%r15
movq 376(%rsp),%rdx
mulx 432(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 440(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 448(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 456(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
adcx %r15,%r14
xorq %rax,%rax
movq 384(%rsp),%rdx
mulx 432(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 440(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 448(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
mulx 456(%rsp),%rcx,%rbp
adcx %rcx,%r14
adox %rbp,%r15
adcx %rax,%r15
xorq %rbp,%rbp
movq $38,%rdx
mulx %r12,%rax,%r12
adcx %rax,%r8
adox %r12,%r9
mulx %r13,%rcx,%r13
adcx %rcx,%r9
adox %r13,%r10
mulx %r14,%rcx,%r14
adcx %rcx,%r10
adox %r14,%r11
mulx %r15,%rcx,%r15
adcx %rcx,%r11
adox %rbp,%r15
adcx %rbp,%r15
movq %r8,144(%rsp)
movq %r9,152(%rsp)
movq %r10,160(%rsp)
movq %r11,168(%rsp)
movq %r15,176(%rsp)
// mul
xorq %r13,%r13
movq 464(%rsp),%rdx
mulx 432(%rsp),%r8,%r9
mulx 440(%rsp),%rcx,%r10
adcx %rcx,%r9
mulx 448(%rsp),%rcx,%r11
adcx %rcx,%r10
mulx 456(%rsp),%rcx,%r12
adcx %rcx,%r11
adcx %r13,%r12
xorq %r14,%r14
movq 472(%rsp),%rdx
mulx 432(%rsp),%rcx,%rbp
adcx %rcx,%r9
adox %rbp,%r10
mulx 440(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 448(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 456(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
adcx %r14,%r13
xorq %r15,%r15
movq 480(%rsp),%rdx
mulx 432(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 440(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 448(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 456(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
adcx %r15,%r14
xorq %rax,%rax
movq 488(%rsp),%rdx
mulx 432(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 440(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 448(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
mulx 456(%rsp),%rcx,%rbp
adcx %rcx,%r14
adox %rbp,%r15
adcx %rax,%r15
xorq %rbp,%rbp
movq $38,%rdx
mulx %r12,%rax,%r12
adcx %rax,%r8
adox %r12,%r9
mulx %r13,%rcx,%r13
adcx %rcx,%r9
adox %r13,%r10
mulx %r14,%rcx,%r14
adcx %rcx,%r10
adox %r14,%r11
mulx %r15,%rcx,%r15
adcx %rcx,%r11
adox %rbp,%r15
adcx %rbp,%r15
movq %r8,184(%rsp)
movq %r9,192(%rsp)
movq %r10,200(%rsp)
movq %r11,208(%rsp)
movq %r15,216(%rsp)
// mul
xorq %r13,%r13
movq 360(%rsp),%rdx
mulx 400(%rsp),%r8,%r9
mulx 408(%rsp),%rcx,%r10
adcx %rcx,%r9
mulx 416(%rsp),%rcx,%r11
adcx %rcx,%r10
mulx 424(%rsp),%rcx,%r12
adcx %rcx,%r11
adcx %r13,%r12
xorq %r14,%r14
movq 368(%rsp),%rdx
mulx 400(%rsp),%rcx,%rbp
adcx %rcx,%r9
adox %rbp,%r10
mulx 408(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 416(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 424(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
adcx %r14,%r13
xorq %r15,%r15
movq 376(%rsp),%rdx
mulx 400(%rsp),%rcx,%rbp
adcx %rcx,%r10
adox %rbp,%r11
mulx 408(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 416(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 424(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
adcx %r15,%r14
xorq %rax,%rax
movq 384(%rsp),%rdx
mulx 400(%rsp),%rcx,%rbp
adcx %rcx,%r11
adox %rbp,%r12
mulx 408(%rsp),%rcx,%rbp
adcx %rcx,%r12
adox %rbp,%r13
mulx 416(%rsp),%rcx,%rbp
adcx %rcx,%r13
adox %rbp,%r14
mulx 424(%rsp),%rcx,%rbp
adcx %rcx,%r14
adox %rbp,%r15
adcx %rax,%r15
xorq %rbp,%rbp
movq $38,%rdx
mulx %r12,%rax,%r12
adcx %rax,%r8
adox %r12,%r9
mulx %r13,%rcx,%r13
adcx %rcx,%r9
adox %r13,%r10
mulx %r14,%rcx,%r14
adcx %rcx,%r10
adox %r14,%r11
mulx %r15,%rcx,%r15
adcx %rcx,%r11
adox %rbp,%r15
adcx %rbp,%r15
shld $1,%r11,%r15
andq mask63(%rip),%r11
imul $19,%r15,%r15
addq %r15,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,224(%rsp)
movq %r9,232(%rsp)
movq %r10,240(%rsp)
movq %r11,248(%rsp)
movq 256(%rsp),%r15
addq $1,%r15
movq %r15,256(%rsp)
cmpq $63,%r15
jle .L
/* store final value of r */
movq 56(%rsp),%rdi
movq 104(%rsp),%r8
movq 112(%rsp),%r9
movq 120(%rsp),%r10
movq 128(%rsp),%r11
movq 136(%rsp),%r12
shld $1,%r11,%r12
andq mask63(%rip),%r11
imul $19,%r12,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq 144(%rsp),%r8
movq 152(%rsp),%r9
movq 160(%rsp),%r10
movq 168(%rsp),%r11
movq 176(%rsp),%r12
shld $1,%r11,%r12
andq mask63(%rip),%r11
imul $19,%r12,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,32(%rdi)
movq %r9,40(%rdi)
movq %r10,48(%rdi)
movq %r11,56(%rdi)
movq 184(%rsp),%r8
movq 192(%rsp),%r9
movq 200(%rsp),%r10
movq 208(%rsp),%r11
movq 216(%rsp),%r12
shld $1,%r11,%r12
andq mask63(%rip),%r11
imul $19,%r12,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,64(%rdi)
movq %r9,72(%rdi)
movq %r10,80(%rdi)
movq %r11,88(%rdi)
movq 224(%rsp),%r8
movq 232(%rsp),%r9
movq 240(%rsp),%r10
movq 248(%rsp),%r11
movq %r8,96(%rdi)
movq %r9,104(%rdi)
movq %r10,112(%rdi)
movq %r11,120(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
movq %r11,%rsp
ret