/* This file is adapted from amd64-51/fe25519_square.s: Adding loop to perform n squares. */ #include "fe51_namespace.h" #include "consts_namespace.h" # qhasm: int64 input_0 # qhasm: int64 input_1 # qhasm: int64 input_2 # qhasm: int64 input_3 # qhasm: int64 input_4 # qhasm: int64 input_5 # qhasm: stack64 input_6 # qhasm: stack64 input_7 # qhasm: int64 caller_r11 # qhasm: int64 caller_r12 # qhasm: int64 caller_r13 # qhasm: int64 caller_r14 # qhasm: int64 caller_r15 # qhasm: int64 caller_rbx # qhasm: int64 caller_rbp # qhasm: int64 r0 # qhasm: int64 r1 # qhasm: int64 r2 # qhasm: int64 r3 # qhasm: int64 r4 # qhasm: int64 x0 # qhasm: int64 x1 # qhasm: int64 x2 # qhasm: int64 x3 # qhasm: int64 x4 # qhasm: stack64 x119_stack # qhasm: stack64 x219_stack # qhasm: stack64 x319_stack # qhasm: stack64 x419_stack # qhasm: int64 squarer01 # qhasm: int64 squarer11 # qhasm: int64 squarer21 # qhasm: int64 squarer31 # qhasm: int64 squarer41 # qhasm: int64 squarerax # qhasm: int64 squarerdx # qhasm: int64 squaret # qhasm: int64 squareredmask # qhasm: int64 iters # qhasm: stack64 r11_stack # qhasm: stack64 r12_stack # qhasm: stack64 r13_stack # qhasm: stack64 r14_stack # qhasm: stack64 r15_stack # qhasm: stack64 rbx_stack # qhasm: stack64 rbp_stack # qhasm: enter fe51_nsquare .p2align 5 .global _fe51_nsquare .global fe51_nsquare _fe51_nsquare: fe51_nsquare: mov %rsp,%r11 and $31,%r11 add $64,%r11 sub %r11,%rsp # qhasm: r11_stack = caller_r11 # asm 1: movq r11_stack=stack64#1 # asm 2: movq r11_stack=0(%rsp) movq %r11,0(%rsp) # qhasm: r12_stack = caller_r12 # asm 1: movq r12_stack=stack64#2 # asm 2: movq r12_stack=8(%rsp) movq %r12,8(%rsp) # qhasm: r13_stack = caller_r13 # asm 1: movq r13_stack=stack64#3 # asm 2: movq r13_stack=16(%rsp) movq %r13,16(%rsp) # qhasm: r14_stack = caller_r14 # asm 1: movq r14_stack=stack64#4 # asm 2: movq r14_stack=24(%rsp) movq %r14,24(%rsp) # qhasm: r15_stack = caller_r15 # asm 1: movq r15_stack=stack64#5 # asm 2: movq r15_stack=32(%rsp) movq %r15,32(%rsp) # qhasm: rbx_stack = caller_rbx # asm 1: movq rbx_stack=stack64#6 # asm 2: movq rbx_stack=40(%rsp) movq %rbx,40(%rsp) # qhasm: rbp_stack = caller_rbp # asm 1: movq rbp_stack=stack64#7 # asm 2: movq rbp_stack=48(%rsp) movq %rbp,48(%rsp) # qhasm: x0 = *(uint64 *)(input_1 + 0) # asm 1: movq 0(x0=int64#4 # asm 2: movq 0(x0=%rcx movq 0(%rsi),%rcx # qhasm: x1 = *(uint64 *)(input_1 + 8) # asm 1: movq 8(x1=int64#5 # asm 2: movq 8(x1=%r8 movq 8(%rsi),%r8 # qhasm: x2 = *(uint64 *)(input_1 + 16) # asm 1: movq 16(x2=int64#6 # asm 2: movq 16(x2=%r9 movq 16(%rsi),%r9 # qhasm: x3 = *(uint64 *)(input_1 + 24) # asm 1: movq 24(x3=int64#7 # asm 2: movq 24(x3=%rax movq 24(%rsi),%rax # qhasm: x4 = *(uint64 *)(input_1 + 32) # asm 1: movq 32(x4=int64#2 # asm 2: movq 32(x4=%rsi movq 32(%rsi),%rsi # qhasm: *(uint64 *)(input_0 + 16) = x2 # asm 1: movq iters=int64#2 # asm 2: mov iters=%rsi mov %rdx,%rsi # qhasm: loop: ._loop: # qhasm: iters -= 1 # asm 1: sub $1,squarerax=int64#7 # asm 2: mov squarerax=%rax mov %rcx,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * x0 # asm 1: mul r0=int64#6 # asm 2: mov r0=%r9 mov %rax,%r9 # qhasm: squarer01 = squarerdx # asm 1: mov squarer01=int64#8 # asm 2: mov squarer01=%r10 mov %rdx,%r10 # qhasm: squarerax = x0 # asm 1: mov squarerax=int64#7 # asm 2: mov squarerax=%rax mov %rcx,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * x1 # asm 1: mul r1=int64#9 # asm 2: mov r1=%r11 mov %rax,%r11 # qhasm: squarer11 = squarerdx # asm 1: mov squarer11=int64#10 # asm 2: mov squarer11=%r12 mov %rdx,%r12 # qhasm: squarerax = x0 # asm 1: mov squarerax=int64#7 # asm 2: mov squarerax=%rax mov %rcx,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 16) # asm 1: mulq 16(r2=int64#11 # asm 2: mov r2=%r13 mov %rax,%r13 # qhasm: squarer21 = squarerdx # asm 1: mov squarer21=int64#12 # asm 2: mov squarer21=%r14 mov %rdx,%r14 # qhasm: squarerax = x0 # asm 1: mov squarerax=int64#7 # asm 2: mov squarerax=%rax mov %rcx,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 24) # asm 1: mulq 24(r3=int64#13 # asm 2: mov r3=%r15 mov %rax,%r15 # qhasm: squarer31 = squarerdx # asm 1: mov squarer31=int64#14 # asm 2: mov squarer31=%rbx mov %rdx,%rbx # qhasm: squarerax = x0 # asm 1: mov squarerax=int64#7 # asm 2: mov squarerax=%rax mov %rcx,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 32) # asm 1: mulq 32(r4=int64#4 # asm 2: mov r4=%rcx mov %rax,%rcx # qhasm: squarer41 = squarerdx # asm 1: mov squarer41=int64#15 # asm 2: mov squarer41=%rbp mov %rdx,%rbp # qhasm: squarerax = x1 # asm 1: mov squarerax=int64#7 # asm 2: mov squarerax=%rax mov %r8,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * x1 # asm 1: mul squarerax=int64#7 # asm 2: mov squarerax=%rax mov %r8,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 16) # asm 1: mulq 16(squarerax=int64#7 # asm 2: mov squarerax=%rax mov %r8,%rax # qhasm: x1 *= 19 # asm 1: imulq $19, squarerax=int64#7 # asm 2: mov squarerax=%rax mov %r8,%rax # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 32) # asm 1: mulq 32(squarerax=int64#7 # asm 2: movq 16(squarerax=%rax movq 16(%rdi),%rax # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 16) # asm 1: mulq 16(squarerax=int64#7 # asm 2: movq 16(squarerax=%rax movq 16(%rdi),%rax # qhasm: squarerax *= 38 # asm 1: imulq $38, squarerax=int64#7 # asm 2: movq 16(squarerax=%rax movq 16(%rdi),%rax # qhasm: squarerax *= 38 # asm 1: imulq $38, squarerax=int64#7 # asm 2: movq 24(squarerax=%rax movq 24(%rdi),%rax # qhasm: squarerax *= 19 # asm 1: imulq $19, squarerax=int64#7 # asm 2: movq 24(squarerax=%rax movq 24(%rdi),%rax # qhasm: squarerax *= 38 # asm 1: imulq $38, squarerax=int64#7 # asm 2: movq 32(squarerax=%rax movq 32(%rdi),%rax # qhasm: squarerax *= 19 # asm 1: imulq $19, squareredmask=int64#3 # asm 2: movq REDMASK51,>squareredmask=%rdx movq REDMASK51(%rip),%rdx # qhasm: r4 &= squareredmask # asm 1: and squaret=int64#6 # asm 2: lea (squaret=%r9 lea (%r9,%rbp),%r9 # qhasm: r0 = squaret # asm 1: mov r0=int64#7 # asm 2: mov r0=%rax mov %r9,%rax # qhasm: (uint64) squaret >>= 51 # asm 1: shr $51,x1=int64#5 # asm 2: mov x1=%r8 mov %r9,%r8 # qhasm: (uint64) squaret >>= 51 # asm 1: shr $51,r2=int64#8 # asm 2: mov r2=%r10 mov %r9,%r10 # qhasm: (uint64) squaret >>= 51 # asm 1: shr $51,r3=int64#8 # asm 2: mov r3=%r10 mov %r9,%r10 # qhasm: (uint64) squaret >>= 51 # asm 1: shr $51,r4=int64#8 # asm 2: mov r4=%r10 mov %r9,%r10 # qhasm: (uint64) squaret >>= 51 # asm 1: shr $51,x0=int64#4 # asm 2: lea (x0=%rcx lea (%rax,%r9),%rcx # qhasm: r4 &= squareredmask # asm 1: and caller_r11=int64#9 # asm 2: movq caller_r11=%r11 movq 0(%rsp),%r11 # qhasm: caller_r12 = r12_stack # asm 1: movq caller_r12=int64#10 # asm 2: movq caller_r12=%r12 movq 8(%rsp),%r12 # qhasm: caller_r13 = r13_stack # asm 1: movq caller_r13=int64#11 # asm 2: movq caller_r13=%r13 movq 16(%rsp),%r13 # qhasm: caller_r14 = r14_stack # asm 1: movq caller_r14=int64#12 # asm 2: movq caller_r14=%r14 movq 24(%rsp),%r14 # qhasm: caller_r15 = r15_stack # asm 1: movq caller_r15=int64#13 # asm 2: movq caller_r15=%r15 movq 32(%rsp),%r15 # qhasm: caller_rbx = rbx_stack # asm 1: movq caller_rbx=int64#14 # asm 2: movq caller_rbx=%rbx movq 40(%rsp),%rbx # qhasm: caller_rbp = rbp_stack # asm 1: movq caller_rbp=int64#15 # asm 2: movq caller_rbp=%rbp movq 48(%rsp),%rbp # qhasm: return add %r11,%rsp ret