-rw-r--r-- 44974 lib25519-20260614/crypto_nG/merged25519/arm64-neonplusuma10l-opt/ge25519_base.S raw
#include "crypto_asm_hidden.h"
// linker define base
/* Assembly for fixed base scalar multiplication.
The code has been optimized using Slothy.
https://github.com/slothy-optimizer/slothy
*/
.p2align 4
ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(base)
.globl _CRYPTO_SHARED_NAMESPACE(base)
ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(base)
.globl CRYPTO_SHARED_NAMESPACE(base)
_CRYPTO_SHARED_NAMESPACE(base):
CRYPTO_SHARED_NAMESPACE(base):
sub sp, sp, #608
stp x19, x20, [sp]
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
stp x0, x1, [sp, #96]
str x2, [sp, #112]
str x3, [sp, #592]
movz x21, #0xffda
movk x21, #0x07ff, lsl 16
movk x21, #0xfffe, lsl 32
movk x21, #0x03ff, lsl 48
movz x22, #0xfffe
movk x22, #0x07ff, lsl 16
movk x22, #0xfffe, lsl 32
movk x22, #0x03ff, lsl 48
stp x21, x22, [sp, #128]
mov w30, #19
/* choose t and initialize r */
mov x25, x2
ldrb w29, [x1, #0]
uxtb w28, w29
sxtb x2, w28
mov x28, xzr
mov x1, x2
asr x1, x1, #7
mov x29, x2
add x29, x29, x1
eor x29, x29, x1
mov x3, #1
mov x8, #1
mov x28, x25
cmp x29, #1
ldp x13, x14, [x28,#0]
csel x3, x13, x3, eq
csel x4, x14, xzr, eq
ldp x13, x14, [x28, #16]
csel x5, x13, xzr, eq
csel x6, x14, xzr, eq
ldr x13, [x28, #32]
csel x7, x13, xzr, eq
ldp x13, x14, [x28, #40]
csel x8, x13, x8, eq
csel x9, x14, xzr, eq
ldp x13, x14, [x28, #56]
csel x10, x13, xzr, eq
csel x11, x14, xzr, eq
ldr x13, [x28, #72]
csel x12, x13, xzr, eq
cmp x29, #2
ldp x13, x14, [x28,#120]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #136]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #152]
csel x7, x13, x7, eq
ldp x13, x14, [x28, #160]
csel x8, x13, x8, eq
csel x9, x14, x9, eq
ldp x13, x14, [x28, #176]
csel x10, x13, x10, eq
csel x11, x14, x11, eq
ldr x13, [x28, #192]
csel x12, x13, x12, eq
cmp x29, #3
ldp x13, x14, [x28,#240]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #256]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #272]
csel x7, x13, x7, eq
ldp x13, x14, [x28, #280]
csel x8, x13, x8, eq
csel x9, x14, x9, eq
ldp x13, x14, [x28, #296]
csel x10, x13, x10, eq
csel x11, x14, x11, eq
ldr x13, [x28, #312]
csel x12, x13, x12, eq
cmp x29, #4
ldp x13, x14, [x28,#360]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #376]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #392]
csel x7, x13, x7, eq
ldp x13, x14, [x28, #400]
csel x8, x13, x8, eq
csel x9, x14, x9, eq
ldp x13, x14, [x28, #416]
csel x10, x13, x10, eq
csel x11, x14, x11, eq
ldr x13, [x28, #432]
csel x12, x13, x12, eq
add x28, x28, #480
cmp x29, #5
ldp x13, x14, [x28,#0]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #16]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #32]
csel x7, x13, x7, eq
ldp x13, x14, [x28, #40]
csel x8, x13, x8, eq
csel x9, x14, x9, eq
ldp x13, x14, [x28, #56]
csel x10, x13, x10, eq
csel x11, x14, x11, eq
ldr x13, [x28, #72]
csel x12, x13, x12, eq
cmp x29, #6
ldp x13, x14, [x28,#120]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #136]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #152]
csel x7, x13, x7, eq
ldp x13, x14, [x28, #160]
csel x8, x13, x8, eq
csel x9, x14, x9, eq
ldp x13, x14, [x28, #176]
csel x10, x13, x10, eq
csel x11, x14, x11, eq
ldr x13, [x28, #192]
csel x12, x13, x12, eq
cmp x29, #7
ldp x13, x14, [x28,#240]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #256]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #272]
csel x7, x13, x7, eq
ldp x13, x14, [x28, #280]
csel x8, x13, x8, eq
csel x9, x14, x9, eq
ldp x13, x14, [x28, #296]
csel x10, x13, x10, eq
csel x11, x14, x11, eq
ldr x13, [x28, #312]
csel x12, x13, x12, eq
cmp x29, #8
ldp x13, x14, [x28,#360]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #376]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #392]
csel x7, x13, x7, eq
ldp x13, x14, [x28, #400]
csel x8, x13, x8, eq
csel x9, x14, x9, eq
ldp x13, x14, [x28, #416]
csel x10, x13, x10, eq
csel x11, x14, x11, eq
ldr x13, [x28, #432]
csel x12, x13, x12, eq
cmp x2, xzr
mov x13, x3
csel x3, x8, x3, lt
csel x8, x13, x8, lt
mov x13, x4
csel x4, x9, x4, lt
csel x9, x13, x9, lt
mov x13, x5
csel x5, x10, x5, lt
csel x10, x13, x10, lt
mov x13, x6
csel x6, x11, x6, lt
csel x11, x13, x11, lt
mov x13, x7
csel x7, x12, x7, lt
csel x12, x13, x12, lt
// sub
ldp x21, x22, [sp, #128]
add x13, x8, x21
sub x13, x13, x3
add x14, x9, x22
sub x14, x14, x4
add x15, x10, x22
sub x15, x15, x5
add x16, x11, x22
sub x16, x16, x6
add x17, x12, x22
sub x17, x17, x7
stp x13, x14, [sp, #144]
stp x15, x16, [sp, #160]
str x17, [sp, #176]
// add
add x3, x8, x3
add x4, x9, x4
add x5, x10, x5
add x6, x11, x6
add x7, x12, x7
stp x3, x4, [sp, #184]
stp x5, x6, [sp, #200]
str x7, [sp, #216]
mov x28, x25
cmp x29, #1
ldp x13, x14, [x28, #80]
csel x3, x13, xzr, eq
csel x4, x14, xzr, eq
ldp x13, x14, [x28, #96]
csel x5, x13, xzr, eq
csel x6, x14, xzr, eq
ldr x13, [x28, #112]
csel x7, x13, xzr, eq
cmp x29, #2
ldp x13, x14, [x28, #200]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #216]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #232]
csel x7, x13, x7, eq
cmp x29, #3
ldp x13, x14, [x28, #320]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #336]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #352]
csel x7, x13, x7, eq
cmp x29, #4
ldp x13, x14, [x28, #440]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #456]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #472]
csel x7, x13, x7, eq
add x28, x28, #480
cmp x29, #5
ldp x13, x14, [x28, #80]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #96]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #112]
csel x7, x13, x7, eq
cmp x29, #6
ldp x13, x14, [x28, #200]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #216]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #232]
csel x7, x13, x7, eq
cmp x29, #7
ldp x13, x14, [x28, #320]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #336]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #352]
csel x7, x13, x7, eq
cmp x29, #8
ldp x13, x14, [x28, #440]
csel x3, x13, x3, eq
csel x4, x14, x4, eq
ldp x13, x14, [x28, #456]
csel x5, x13, x5, eq
csel x6, x14, x6, eq
ldr x13, [x28, #472]
csel x7, x13, x7, eq
// neg
ldp x21, x22, [sp, #128]
sub x8, x21, x3
sub x9, x22, x4
sub x10, x22, x5
sub x11, x22, x6
sub x12, x22, x7
cmp x2, xzr
csel x3, x8, x3, lt
csel x4, x9, x4, lt
csel x5, x10, x5, lt
csel x6, x11, x6, lt
csel x7, x12, x7, lt
stp x3, x4, [sp, #264]
stp x5, x6, [sp, #280]
str x7, [sp, #296]
mov x3, #2
stp x3, xzr, [sp, #224]
stp xzr, xzr, [sp, #240]
str xzr, [sp, #256]
/* loop: i=1,i<64,i=i+1 */
mov w27, #1
str w27, [sp, #120]
dup v31.2s, w30
mov w29, #0x03ffffff
dup v30.2d, x29
.L:
/* slothy optimized code starts */
ldr x4, [sp, #112]
mov x25, #960
mul w21, w27, w25
ldr x9, [sp, #104]
add x4, x21, x4
ldp x28, x5, [x4, #136]
ldp x1, x6, [x4, #56]
add x9, x9, x27
ldrb w9, [x9]
ldp x26, x23, [x4, #0]
ldr x27, [x4, #272]
uxtb w9, w9
ldp x10, x16, [x4, #40]
sxtb x22, w9
mov x0, #1
mov x9, x22
ldr x7, [x4, #32]
asr x25, x9, #7
ldr x11, [x4, #72]
mov x9, x22
ldr x19, [x4, #192]
ldr x13, [x4, #152]
add x9, x9, x25
eor x25, x9, x25
ldp x12, x9, [x4, #16]
cmp x25, #1
mov x21, #1
csel x2, x26, x0, eq
ldp x3, x29, [x4, #176]
csel x1, x1, xzr, eq
csel x7, x7, xzr, eq
ldr x14, [x4, #312]
csel x12, x12, xzr, eq
ldp x17, x15, [x4, #160]
csel x16, x16, xzr, eq
csel x10, x10, x21, eq
csel x8, x23, xzr, eq
csel x23, x9, xzr, eq
ldp x24, x26, [x4, #120]
csel x0, x6, xzr, eq
csel x11, x11, xzr, eq
ldp x18, x9, [x4, #280]
cmp x25, #2
csel x6, x28, x12, eq
csel x28, x5, x23, eq
csel x5, x3, x1, eq
ldp x1, x20, [x4, #376]
csel x23, x17, x10, eq
csel x24, x24, x2, eq
csel x10, x19, x11, eq
ldp x11, x21, [x4, #296]
csel x0, x29, x0, eq
csel x12, x26, x8, eq
ldp x2, x17, [x4, #256]
csel x16, x15, x16, eq
csel x29, x13, x7, eq
cmp x25, #3
ldr x13, [x4, #432]
csel x26, x21, x0, eq
ldp x7, x21, [x4, #240]
csel x14, x14, x10, eq
csel x3, x27, x29, eq
csel x9, x9, x16, eq
ldr x29, [x4, #392]
ldp x19, x15, [x4, #360]
csel x23, x18, x23, eq
csel x27, x7, x24, eq
csel x0, x11, x5, eq
csel x16, x2, x6, eq
ldp x6, x10, [x4, #400]
csel x11, x21, x12, eq
csel x24, x17, x28, eq
cmp x25, #4
ldp x7, x21, [x4, #416]
add x2, x4, #480
ldp x5, x28, [x2, #40]
csel x14, x13, x14, eq
ldp x8, x12, [x2, #120]
csel x10, x10, x9, eq
csel x17, x15, x11, eq
ldp x9, x18, [x2, #0]
csel x7, x7, x0, eq
csel x3, x29, x3, eq
ldp x0, x29, [x2, #16]
csel x15, x6, x23, eq
csel x13, x21, x26, eq
csel x23, x20, x24, eq
ldr x11, [x2, #192]
csel x1, x1, x16, eq
csel x26, x19, x27, eq
ldp x21, x20, [x2, #56]
mov x27, x4
cmp x25, #5
ldr x4, [x2, #152]
csel x1, x0, x1, eq
csel x18, x18, x17, eq
ldr x17, [x2, #32]
csel x21, x21, x7, eq
ldr x24, [x2, #72]
ldp x7, x6, [x2, #240]
csel x9, x9, x26, eq
mov x27, x27
csel x0, x20, x13, eq
ldp x16, x13, [x2, #136]
csel x3, x17, x3, eq
csel x24, x24, x14, eq
csel x20, x5, x15, eq
ldp x5, x15, [x2, #176]
csel x19, x28, x10, eq
csel x14, x29, x23, eq
cmp x25, #6
csel x13, x13, x14, eq
ldp x17, x23, [x2, #160]
csel x28, x8, x9, eq
csel x14, x11, x24, eq
csel x24, x15, x0, eq
ldp x15, x9, [x2, #296]
csel x16, x16, x1, eq
csel x18, x12, x18, eq
csel x26, x4, x3, eq
ldr x10, [x2, #312]
csel x19, x23, x19, eq
ldp x4, x1, [x2, #256]
csel x0, x17, x20, eq
csel x20, x5, x21, eq
cmp x25, #7
ldp x11, x29, [x2, #280]
csel x23, x9, x24, eq
csel x3, x10, x14, eq
ldr x9, [x2, #272]
csel x8, x6, x18, eq
ldp x14, x12, [x2, #360]
csel x5, x15, x20, eq
csel x18, x29, x19, eq
csel x16, x4, x16, eq
ldp x17, x4, [x2, #400]
csel x24, x7, x28, eq
csel x10, x1, x13, eq
csel x13, x11, x0, eq
ldp x7, x28, [x2, #376]
csel x26, x9, x26, eq
cmp x25, #8
ldp x0, x29, [x2, #416]
add x9, x27, #480
csel x19, x4, x18, eq
csel x6, x12, x8, eq
csel x8, x14, x24, eq
ldr x24, [x2, #392]
csel x15, x7, x16, eq
ldr x21, [x2, #432]
csel x1, x0, x5, eq
csel x5, x28, x10, eq
csel x23, x29, x23, eq
ldp x28, x20, [x27, #200]
csel x2, x17, x13, eq
csel x29, x24, x26, eq
csel x14, x21, x3, eq
ldp x16, x21, [x9, #216]
mov x7, x8
cmp x22, xzr
ldp x26, x0, [x27, #80]
mov x10, x29
csel x4, x2, x8, lt
mov x17, x15
csel x10, x10, x14, lt
csel x24, x17, x1, lt
csel x11, x7, x2, lt
csel x17, x1, x15, lt
mov x18, x6
ldr x12, [x27, #232]
csel x1, x18, x19, lt
ldr x7, [x27, #112]
stp x11, x1, [sp, #344]
ldr x1, [x27, #472]
csel x15, x14, x29, lt
ldp x14, x18, [x27, #336]
csel x6, x19, x6, lt
mov x3, x5
csel x29, x23, x5, lt
ldp x13, x5, [x27, #96]
csel x3, x3, x23, lt
cmp x25, #1
csel x11, x0, xzr, eq
stp x4, x6, [sp, #304]
csel x4, x7, xzr, eq
ldp x2, x6, [x27, #216]
csel x0, x13, xzr, eq
csel x19, x26, xzr, eq
csel x26, x5, xzr, eq
ldp x8, x23, [x9, #96]
ldr x5, [x9, #352]
cmp x25, #2
csel x7, x28, x19, eq
ldp x13, x28, [x27, #320]
csel x26, x6, x26, eq
csel x19, x2, x0, eq
csel x2, x12, x4, eq
ldr x0, [x27, #352]
ldr x4, [x9, #472]
csel x12, x20, x11, eq
ldr x20, [x9, #112]
ldp x6, x11, [x27, #456]
cmp x25, #3
csel x19, x14, x19, eq
csel x14, x18, x26, eq
ldp x27, x26, [x27, #440]
csel x18, x13, x7, eq
csel x7, x0, x2, eq
csel x2, x28, x12, eq
ldr x28, [x9, #232]
cmp x25, #4
ldp x0, x13, [x9, #80]
csel x6, x6, x19, eq
csel x27, x27, x18, eq
csel x2, x26, x2, eq
csel x1, x1, x7, eq
ldp x12, x7, [x9, #336]
csel x14, x11, x14, eq
cmp x25, #5
csel x26, x13, x2, eq
ldp x13, x18, [x9, #200]
csel x0, x0, x27, eq
csel x2, x20, x1, eq
csel x23, x23, x14, eq
ldp x11, x20, [x9, #320]
csel x27, x8, x6, eq
cmp x25, #6
csel x19, x13, x0, eq
ldp x14, x13, [x9, #456]
csel x0, x16, x27, eq
csel x2, x28, x2, eq
csel x23, x21, x23, eq
ldp x1, x27, [x9, #440]
csel x9, x18, x26, eq
cmp x25, #7
csel x8, x12, x0, eq
ldp x26, x21, [sp, #184]
csel x0, x20, x9, eq
stp x24, x3, [sp, #360]
csel x2, x5, x2, eq
ldp x3, x24, [sp, #144]
csel x9, x11, x19, eq
csel x5, x7, x23, eq
ldp x6, x7, [sp, #200]
cmp x25, #8
csel x25, x1, x9, eq
add x20, x26, x3
lsr x9, x20, #32
mov w12, w20
csel x19, x14, x8, eq
ldp x23, x11, [sp, #160]
add x8, x21, x24
add x28, x9, x12, lsr #26
mov w1, w8
lsr x8, x8, #32
add x16, x1, x28, lsr #25
csel x20, x13, x5, eq
csel x5, x27, x0, eq
csel x18, x4, x2, eq
add x27, x8, x16, lsr #26
add x9, x6, x23
mov w4, w9
lsr x9, x9, #32
cmp x22, xzr
add x22, x4, x27, lsr #25
add x4, x7, x11
ldr x13, [sp, #176]
add x0, x9, x22, lsr #26
ldr x14, [sp, #216]
mov w9, w4
lsr x1, x4, #32
add x9, x9, x0, lsr #25
and x16, x16, #0x3ffffff
bfi x16, x27, #32, #25
ldp x4, x2, [sp, #128]
add x8, x1, x9, lsr #26
add x27, x14, x13
lsr x1, x27, #32
mov w27, w27
str x15, [sp, #336]
add x15, x27, x8, lsr #25
add x4, x26, x4
add x27, sp, #304
sub x4, x4, x3
add x26, x1, x15, lsr #26
ld2 {v7.s, v8.s}[0], [x27], #8
and x12, x12, #0x3ffffff
bic x1, x26, #0x1ffffff
add x21, x21, x2
stp x17, x29, [sp, #320]
add x29, x12, x1, lsr #25
ld2 {v16.s, v17.s}[0], [x27], #8
add x12, sp, #344
add x17, x29, x1, lsr #24
and x3, x28, #0x1ffffff
sub x24, x21, x24
ld2 {v7.s, v8.s}[1], [x12], #8
add x28, sp, #424
add x17, x17, x1, lsr #21
ld2 {v5.s, v6.s}[0], [x27], #8
stp x4, x24, [sp, #424]
ld2 {v3.s, v4.s}[0], [x28], #8
add x24, x3, x17, lsr #26
ld2 {v16.s, v17.s}[1], [x12], #8
and x29, x17, #0x3ffffff
bfi x29, x24, #32, #26
ld2 {v24.s, v25.s}[0], [x27], #8
add x3, sp, #464
ld2 {v21.s, v22.s}[0], [x28], #8
stp x29, x16, [sp, #464]
ld2 {v5.s, v6.s}[1], [x12], #8
ld2 {v3.s, v4.s}[1], [x3], #8
add x16, x7, x2
add x6, x6, x2
ld2 {v28.s, v29.s}[0], [x27], #8
sub x7, x6, x23
ld2 {v24.s, v25.s}[1], [x12], #8
sub x1, x16, x11
ld2 {v21.s, v22.s}[1], [x3], #8
umull v10.2d, v7.2s, v4.2s
stp x7, x1, [sp, #440]
ld2 {v19.s, v20.s}[0], [x28], #8
umull v0.2d, v7.2s, v22.2s
and x22, x22, #0x3ffffff
umull v13.2d, v7.2s, v21.2s
and x9, x9, #0x3ffffff
bfi x9, x8, #32, #25
ld2 {v1.s, v2.s}[0], [x28], #8
umlal v10.2d, v8.2s, v3.2s
bfi x22, x0, #32, #25
umlal v0.2d, v8.2s, v21.2s
umlal v0.2d, v16.2s, v4.2s
stp x22, x9, [sp, #480]
ld2 {v19.s, v20.s}[1], [x3], #8
umull v18.2d, v7.2s, v3.2s
umlal v13.2d, v16.2s, v3.2s
ld2 {v1.s, v2.s}[1], [x3], #8
umull v11.2d, v7.2s, v20.2s
umull v12.2d, v7.2s, v19.2s
umull v27.2d, v7.2s, v1.2s
umull v23.2d, v7.2s, v2.2s
umlal v11.2d, v8.2s, v19.2s
umlal v11.2d, v16.2s, v22.2s
umlal v11.2d, v17.2s, v21.2s
umlal v11.2d, v5.2s, v4.2s
umlal v0.2d, v17.2s, v3.2s
and x17, x15, #0x3ffffff
umlal v12.2d, v16.2s, v21.2s
add x21, x14, x2
umlal v27.2d, v16.2s, v19.2s
sub x11, x21, x13
umlal v27.2d, v5.2s, v21.2s
str x11, [sp, #456]
umlal v23.2d, v8.2s, v1.2s
bfi x17, x26, #32, #25
umlal v23.2d, v16.2s, v20.2s
umlal v23.2d, v17.2s, v19.2s
str x17, [sp, #496]
ld2 {v14.s, v15.s}[0], [x28], #8
umlal v12.2d, v5.2s, v3.2s
umlal v11.2d, v6.2s, v3.2s
ld2 {v14.s, v15.s}[1], [x3], #8
umlal v23.2d, v5.2s, v22.2s
umlal v27.2d, v24.2s, v3.2s
mul v26.2s, v14.2s, v31.2s
umull v9.2d, v7.2s, v14.2s
umull v7.2d, v7.2s, v15.2s
mul v15.2s, v15.2s, v31.2s
umlal v11.2d, v25.2s, v26.2s
umlal v9.2d, v16.2s, v1.2s
umlal v12.2d, v24.2s, v26.2s
umlal v7.2d, v8.2s, v14.2s
umlal v7.2d, v16.2s, v2.2s
umlal v7.2d, v17.2s, v1.2s
umlal v7.2d, v5.2s, v20.2s
mul v14.2s, v1.2s, v31.2s
umlal v9.2d, v5.2s, v19.2s
umlal v18.2d, v16.2s, v26.2s
umlal v7.2d, v6.2s, v19.2s
mul v19.2s, v19.2s, v31.2s
umlal v13.2d, v5.2s, v26.2s
umlal v13.2d, v24.2s, v14.2s
umlal v0.2d, v6.2s, v26.2s
umlal v0.2d, v25.2s, v14.2s
mul v1.2s, v21.2s, v31.2s
umlal v10.2d, v17.2s, v26.2s
umlal v10.2d, v6.2s, v14.2s
str x10, [sp, #376]
umlal v10.2d, v25.2s, v19.2s
shl v8.2s, v8.2s, #1
umlal v18.2d, v5.2s, v14.2s
ld2 {v28.s, v29.s}[1], [x12], #8
umlal v7.2d, v24.2s, v22.2s
umlal v9.2d, v24.2s, v21.2s
umlal v9.2d, v28.2s, v3.2s
umlal v9.2d, v8.2s, v2.2s
umlal v18.2d, v24.2s, v19.2s
umlal v18.2d, v28.2s, v1.2s
umlal v10.2d, v29.2s, v1.2s
umlal v10.2d, v16.2s, v15.2s
umlal v0.2d, v29.2s, v19.2s
umlal v13.2d, v28.2s, v19.2s
umlal v12.2d, v28.2s, v14.2s
mul v19.2s, v2.2s, v31.2s
umlal v27.2d, v28.2s, v26.2s
umlal v0.2d, v5.2s, v15.2s
umlal v23.2d, v6.2s, v21.2s
umlal v18.2d, v8.2s, v15.2s
umlal v11.2d, v29.2s, v14.2s
umlal v12.2d, v8.2s, v22.2s
umlal v27.2d, v8.2s, v20.2s
ldp x26, x24, [sp, #128]
shl v17.2s, v17.2s, #1
shl v16.2s, v6.2s, #1
umlal v10.2d, v5.2s, v19.2s
mul v1.2s, v20.2s, v31.2s
umlal v18.2d, v17.2s, v19.2s
mul v5.2s, v22.2s, v31.2s
umlal v23.2d, v24.2s, v4.2s
umlal v23.2d, v25.2s, v3.2s
sub x11, x24, x19
umlal v11.2d, v24.2s, v15.2s
sub x10, x24, x5
shl v2.2s, v29.2s, #1
shl v6.2s, v25.2s, #1
umlal v18.2d, v16.2s, v1.2s
sub x9, x24, x18
umlal v18.2d, v6.2s, v5.2s
csel x21, x9, x18, lt
csel x15, x10, x5, lt
mul v14.2s, v4.2s, v31.2s
umlal v27.2d, v17.2s, v22.2s
sub x9, x26, x25
umlal v10.2d, v24.2s, v1.2s
csel x22, x9, x25, lt
umlal v10.2d, v28.2s, v5.2s
stp x22, x15, [sp, #384]
umlal v18.2d, v2.2s, v14.2s
ldp x15, x10, [sp, #384]
umlal v13.2d, v8.2s, v4.2s
str x21, [sp, #416]
umlal v13.2d, v17.2s, v15.2s
csel x11, x11, x19, lt
umlal v13.2d, v16.2s, v19.2s
ldp x25, x19, [sp, #264]
umlal v13.2d, v6.2s, v1.2s
lsr x22, x15, #32
umlal v13.2d, v2.2s, v5.2s
ldr x21, [sp, #416]
usra v10.2d, v18.2d, #26
ldp x8, x28, [sp, #128]
umlal v0.2d, v24.2s, v19.2s
umull x9, w25, w15
umlal v12.2d, v17.2s, v4.2s
lsr x14, x19, #32
umlal v12.2d, v16.2s, v15.2s
sub x17, x24, x20
umlal v27.2d, v16.2s, v4.2s
csel x29, x17, x20, lt
umlal v27.2d, v6.2s, v15.2s
stp x11, x29, [sp, #400]
umlal v0.2d, v28.2s, v1.2s
lsr x20, x21, #32
usra v13.2d, v10.2d, #25
add x5, sp, #304
umlal v12.2d, v6.2s, v19.2s
add x17, sp, #424
umlal v9.2d, v17.2s, v20.2s
umlal v12.2d, v2.2s, v1.2s
usra v0.2d, v13.2d, #26
umlal v23.2d, v29.2s, v26.2s
umlal v11.2d, v28.2s, v19.2s
usra v12.2d, v0.2d, #25
umlal v9.2d, v16.2s, v22.2s
umlal v27.2d, v2.2s, v19.2s
usra v11.2d, v12.2d, #26
umlal v23.2d, v28.2s, v15.2s
umlal v7.2d, v25.2s, v21.2s
usra v27.2d, v11.2d, #25
umlal v9.2d, v6.2s, v4.2s
umlal v9.2d, v2.2s, v15.2s
usra v23.2d, v27.2d, #26
umlal v7.2d, v28.2s, v4.2s
umlal v7.2d, v29.2s, v3.2s
usra v9.2d, v23.2d, #25
usra v7.2d, v9.2d, #26
ushr v24.2d, v30.2d, #1
bic v1.16B, v7.16B, v24.16B
and v18.16B, v18.16B, v30.16B
usra v18.2d, v1.2d, #25
and v21.16B, v10.16B, v24.16B
and v16.16B, v13.16B, v30.16B
usra v18.2d, v1.2d, #24
and v26.16B, v12.16B, v30.16B
and v17.16B, v0.16B, v24.16B
usra v18.2d, v1.2d, #21
and v1.16B, v27.16B, v30.16B
lsr x29, x10, #32
and v27.16B, v11.16B, v24.16B
lsr x27, x25, #32
usra v21.2d, v18.2d, #26
umull x12, w25, w29
and v14.16B, v9.16B, v30.16B
ldp x23, x16, [sp, #400]
and v20.16B, v18.16B, v30.16B
umull x3, w25, w10
st2 {v20.s, v21.s}[2], [x5], #8
umaddl x12, w27, w10, x12
umull x24, w25, w20
st2 {v16.s, v17.s}[2], [x5], #8
umull x1, w25, w22
umaddl x7, w19, w22, x12
st2 {v20.s, v21.s}[0], [x17], #8
umaddl x18, w27, w21, x24
umull x24, w25, w21
st2 {v26.s, v27.s}[2], [x5], #8
umaddl x12, w14, w15, x7
umaddl x26, w19, w15, x3
and v2.16B, v23.16B, v24.16B
ldp x2, x11, [sp, #304]
and v15.16B, v7.16B, v24.16B
umaddl x3, w27, w15, x1
lsr x1, x16, #32
st2 {v1.s, v2.s}[2], [x5], #8
mul w21, w21, w30
st2 {v14.s, v15.s}[2], [x5], #8
umull x4, w25, w1
umaddl x6, w19, w1, x18
umaddl x18, w19, w21, x9
st2 {v16.s, v17.s}[0], [x17], #8
umaddl x9, w14, w21, x3
add x3, x11, x28
ldp x0, x5, [sp, #424]
add x13, x2, x8
lsr x8, x23, #32
umaddl x4, w27, w16, x4
st2 {v26.s, v27.s}[0], [x17], #8
umaddl x7, w19, w16, x24
add x24, x2, x0
add x11, x11, x5
umaddl x2, w19, w8, x4
st2 {v1.s, v2.s}[0], [x17], #8
umull x4, w25, w16
stp x24, x11, [sp, #424]
ldp x24, x11, [sp, #320]
sub x3, x3, x5
sub x5, x13, x0
umaddl x13, w19, w23, x4
ldp x4, x0, [sp, #440]
stp x5, x3, [sp, #464]
umull x5, w25, w8
st2 {v14.s, v15.s}[0], [x17], #8
umaddl x17, w14, w16, x6
umull x6, w25, w23
mul w25, w20, w30
add x3, x11, x0
umaddl x20, w14, w23, x2
add x2, x24, x4
stp x2, x3, [sp, #440]
ldp x3, x2, [sp, #280]
umaddl x5, w27, w23, x5
mul w16, w16, w30
umaddl x6, w19, w10, x6
umaddl x7, w3, w23, x7
umaddl x20, w3, w29, x20
umaddl x13, w3, w10, x13
umaddl x7, w2, w10, x7
umaddl x5, w19, w29, x5
umaddl x6, w3, w15, x6
add x11, x11, x28
umaddl x17, w3, w8, x17
sub x11, x11, x0
add x0, x24, x28
umaddl x24, w14, w10, x5
umaddl x26, w3, w21, x26
sub x4, x0, x4
umaddl x18, w3, w16, x18
lsr x0, x3, #32
umaddl x5, w2, w15, x13
umaddl x13, w3, w22, x24
umaddl x24, w2, w21, x6
umaddl x17, w0, w23, x17
mul w23, w23, w30
umaddl x6, w0, w10, x20
umaddl x6, w2, w22, x6
stp x4, x11, [sp, #480]
umaddl x26, w2, w16, x26
ldr x11, [sp, #296]
ldr x4, [sp, #336]
umaddl x20, w2, w23, x18
ldr x18, [sp, #456]
umaddl x9, w0, w16, x9
umaddl x13, w0, w15, x13
add x27, x27, x27
umaddl x5, w11, w21, x5
umaddl x5, w27, w8, x5
add x28, x4, x28
add x4, x4, x18
umaddl x12, w0, w21, x12
str x4, [sp, #456]
umaddl x7, w11, w15, x7
lsr x4, x2, #32
umaddl x26, w11, w23, x26
umaddl x17, w2, w29, x17
umaddl x24, w11, w16, x24
umaddl x12, w4, w16, x12
umaddl x17, w4, w10, x17
umaddl x9, w4, w23, x9
sub x18, x28, x18
mul w28, w10, w30
umaddl x13, w4, w21, x13
lsr x10, x11, #32
umaddl x6, w4, w15, x6
umaddl x17, w11, w22, x17
umaddl x9, w10, w28, x9
umaddl x13, w10, w16, x13
umaddl x16, w11, w28, x20
umaddl x20, w27, w29, x24
umaddl x12, w10, w23, x12
umaddl x28, w10, w21, x6
umaddl x19, w19, w25, x9
mul w24, w29, w30
umaddl x23, w27, w25, x16
add x16, sp, #464
umaddl x21, w27, w1, x7
ld2 {v3.s, v4.s}[0], [x16], #8
umaddl x7, w3, w25, x12
mul w12, w1, w30
add x6, x0, x0
add x14, x14, x14
umaddl x0, w10, w15, x17
ld2 {v9.s, v10.s}[0], [x16], #8
umaddl x9, w27, w22, x26
str x18, [sp, #496]
umaddl x1, w3, w12, x19
umaddl x3, w2, w12, x7
ldp x27, x15, [sp, #224]
ld2 {v28.s, v29.s}[0], [x16], #8
umaddl x26, w14, w29, x5
add x17, sp, #424
umaddl x7, w14, w25, x9
add x18, x10, x10
umaddl x10, w14, w12, x23
ld2 {v7.s, v8.s}[0], [x16], #8
mul w23, w8, w30
umaddl x7, w6, w12, x7
add x9, x4, x4
umaddl x5, w14, w8, x21
ld2 {v3.s, v4.s}[1], [x17], #8
ld2 {v12.s, v13.s}[0], [x16], #8
umaddl x21, w6, w23, x10
umaddl x4, w9, w23, x7
add x16, x27, x27
add x10, x15, x15
umaddl x19, w2, w23, x1
umaddl x2, w2, w25, x13
ld2 {v9.s, v10.s}[1], [x17], #8
umaddl x13, w14, w22, x20
mov w14, w10
lsr x15, x10, #32
umaddl x8, w9, w24, x21
umaddl x21, w6, w29, x5
lsr x5, x16, #32
ldp x10, x20, [sp, #240]
mul w7, w22, w30
umaddl x1, w11, w24, x19
mov w16, w16
umaddl x13, w6, w25, x13
add x29, x5, x16, lsr #26
umaddl x5, w18, w7, x8
ldr x8, [sp, #256]
add x19, x10, x10
umaddl x7, w6, w22, x26
umaddl x27, w18, w24, x4
ld2 {v28.s, v29.s}[1], [x17], #8
add x24, x1, x5, lsr #26
umaddl x10, w9, w12, x13
add x20, x20, x20
umaddl x4, w11, w23, x3
umaddl x7, w9, w25, x7
add x1, x27, x24, lsr #25
umaddl x23, w18, w23, x10
add x10, x14, x29, lsr #25
umaddl x3, w11, w12, x2
add x4, x4, x1, lsr #26
add x26, x15, x10, lsr #26
umaddl x12, w18, w12, x7
add x2, x23, x4, lsr #25
mov w14, w20
mov w23, w19
add x15, x23, x26, lsr #25
lsr x7, x19, #32
umaddl x23, w11, w25, x28
add x19, x3, x2, lsr #26
lsr x27, x20, #32
add x6, x7, x15, lsr #26
add x11, x12, x19, lsr #25
umaddl x9, w9, w22, x21
umaddl x9, w18, w25, x9
add x25, x14, x6, lsr #25
add x21, x8, x8
add x14, x23, x11, lsr #26
add x27, x27, x25, lsr #26
and x18, x29, #0x1ffffff
mov w20, w21
add x7, x9, x14, lsr #25
add x20, x20, x27, lsr #25
lsr x9, x21, #32
and x25, x25, #0x3ffffff
add x12, x0, x7, lsr #26
add x23, x9, x20, lsr #26
and x9, x5, #0x3ffffff
and x21, x16, #0x3ffffff
bic x22, x12, #0x1ffffff
add x9, x9, x22, lsr #25
bic x5, x23, #0x1ffffff
and x8, x1, #0x3ffffff
add x21, x21, x5, lsr #25
add x9, x9, x22, lsr #24
and x1, x10, #0x3ffffff
bfi x1, x26, #32, #25
add x0, x21, x5, lsr #24
and x21, x24, #0x1ffffff
add x29, x9, x22, lsr #21
ldp x22, x16, [sp, #128]
add x5, x0, x5, lsr #21
ld2 {v7.s, v8.s}[1], [x17], #8
add x3, sp, #464
add x21, x21, x29, lsr #26
add x9, x18, x5, lsr #26
and x26, x29, #0x3ffffff
and x0, x5, #0x3ffffff
bfi x0, x9, #32, #26
add x10, sp, #544
bfi x8, x4, #32, #25
bfi x26, x21, #32, #26
add x21, x1, x16
add x24, x0, x22
sub x28, x21, x8
sub x18, x24, x26
add x13, x8, x1
stp x18, x28, [x3, #80]
add x0, x26, x0
ld2 {v17.s, v18.s}[0], [x10], #8
and x24, x15, #0x3ffffff
add x15, sp, #504
stp x0, x13, [x3, #40]
ld2 {v12.s, v13.s}[1], [x17], #8
ld2 {v17.s, v18.s}[1], [x15], #8
bfi x25, x27, #32, #25
ld2 {v14.s, v15.s}[0], [x10], #8
and x21, x11, #0x3ffffff
bfi x24, x6, #32, #25
bfi x21, x14, #32, #25
umull v26.2d, v3.2s, v18.2s
and x9, x2, #0x3ffffff
ld2 {v14.s, v15.s}[1], [x15], #8
add x28, x25, x16
sub x17, x28, x21
bfi x9, x19, #32, #25
umull v11.2d, v3.2s, v17.2s
add x5, x24, x16
umull v22.2d, v3.2s, v15.2s
sub x18, x5, x9
umlal v26.2d, v4.2s, v17.2s
stp x18, x17, [x3, #96]
ld2 {v5.s, v6.s}[0], [x10], #8
add x6, x9, x24
umull v25.2d, v3.2s, v14.2s
add x18, x21, x25
umlal v22.2d, v4.2s, v14.2s
stp x6, x18, [x3, #56]
ld2 {v5.s, v6.s}[1], [x15], #8
ld2 {v23.s, v24.s}[0], [x10], #8
umlal v25.2d, v9.2s, v17.2s
umull v16.2d, v3.2s, v6.2s
ld2 {v23.s, v24.s}[1], [x15], #8
umlal v22.2d, v9.2s, v18.2s
umlal v22.2d, v10.2s, v17.2s
umull v21.2d, v3.2s, v24.2s
umull v20.2d, v3.2s, v23.2s
and x4, x7, #0x3ffffff
umlal v16.2d, v4.2s, v5.2s
and x1, x20, #0x3ffffff
umlal v16.2d, v9.2s, v15.2s
bfi x1, x23, #32, #25
umlal v21.2d, v4.2s, v23.2s
bfi x4, x12, #32, #25
umlal v21.2d, v9.2s, v6.2s
add x9, x1, x16
umlal v21.2d, v10.2s, v5.2s
sub x25, x9, x4
umlal v21.2d, v28.2s, v15.2s
str x25, [x3, #112]
umlal v21.2d, v29.2s, v14.2s
umlal v16.2d, v10.2s, v14.2s
umlal v16.2d, v28.2s, v18.2s
add x13, x4, x1
umlal v16.2d, v29.2s, v17.2s
str x13, [x3, #72]
ld2 {v0.s, v1.s}[0], [x10], #8
umlal v21.2d, v7.2s, v18.2s
umlal v21.2d, v8.2s, v17.2s
ld2 {v0.s, v1.s}[1], [x15], #8
umlal v20.2d, v9.2s, v5.2s
umlal v20.2d, v28.2s, v14.2s
umull v27.2d, v3.2s, v0.2s
umull v19.2d, v3.2s, v1.2s
mul v2.2s, v1.2s, v31.2s
umull v1.2d, v3.2s, v5.2s
umlal v27.2d, v9.2s, v23.2s
mul v3.2s, v0.2s, v31.2s
umlal v19.2d, v4.2s, v0.2s
mul v0.2s, v23.2s, v31.2s
umlal v1.2d, v9.2s, v14.2s
umlal v1.2d, v28.2s, v17.2s
umlal v11.2d, v9.2s, v3.2s
umlal v19.2d, v9.2s, v24.2s
umlal v25.2d, v28.2s, v3.2s
umlal v16.2d, v8.2s, v3.2s
umlal v27.2d, v28.2s, v5.2s
umlal v19.2d, v10.2s, v23.2s
umlal v1.2d, v7.2s, v3.2s
shl v4.2s, v4.2s, #1
umlal v25.2d, v7.2s, v0.2s
umlal v19.2d, v28.2s, v6.2s
umlal v27.2d, v7.2s, v14.2s
umlal v26.2d, v10.2s, v3.2s
umlal v26.2d, v29.2s, v0.2s
umlal v11.2d, v28.2s, v0.2s
umlal v27.2d, v12.2s, v17.2s
mul v23.2s, v5.2s, v31.2s
umlal v1.2d, v12.2s, v0.2s
umlal v1.2d, v4.2s, v15.2s
umlal v27.2d, v4.2s, v24.2s
umlal v19.2d, v29.2s, v5.2s
umlal v22.2d, v29.2s, v3.2s
mul v5.2s, v14.2s, v31.2s
umlal v26.2d, v8.2s, v23.2s
umlal v19.2d, v7.2s, v15.2s
umlal v22.2d, v8.2s, v0.2s
umlal v16.2d, v13.2s, v0.2s
umlal v26.2d, v13.2s, v5.2s
umlal v26.2d, v9.2s, v2.2s
mul v9.2s, v24.2s, v31.2s
umlal v20.2d, v7.2s, v17.2s
umlal v22.2d, v13.2s, v23.2s
umlal v16.2d, v7.2s, v2.2s
umlal v19.2d, v8.2s, v14.2s
umlal v11.2d, v7.2s, v23.2s
umlal v20.2d, v12.2s, v3.2s
umlal v20.2d, v4.2s, v6.2s
umlal v16.2d, v12.2s, v9.2s
umlal v21.2d, v13.2s, v3.2s
umlal v22.2d, v28.2s, v2.2s
mul v14.2s, v6.2s, v31.2s
umlal v11.2d, v12.2s, v5.2s
umlal v26.2d, v28.2s, v9.2s
umlal v22.2d, v7.2s, v9.2s
umlal v22.2d, v12.2s, v14.2s
umlal v19.2d, v12.2s, v18.2s
umlal v26.2d, v7.2s, v14.2s
mul v24.2s, v15.2s, v31.2s
umlal v11.2d, v4.2s, v2.2s
umlal v25.2d, v12.2s, v23.2s
umlal v25.2d, v4.2s, v18.2s
umlal v26.2d, v12.2s, v24.2s
umlal v19.2d, v13.2s, v17.2s
shl v29.2s, v29.2s, #1
shl v10.2s, v10.2s, #1
mul v0.2s, v18.2s, v31.2s
umlal v11.2d, v10.2s, v9.2s
umlal v25.2d, v10.2s, v2.2s
umlal v25.2d, v29.2s, v9.2s
umlal v1.2d, v10.2s, v18.2s
shl v5.2s, v8.2s, #1
shl v3.2s, v13.2s, #1
umlal v11.2d, v29.2s, v14.2s
umlal v11.2d, v5.2s, v24.2s
umlal v11.2d, v3.2s, v0.2s
umlal v1.2d, v29.2s, v2.2s
umlal v25.2d, v5.2s, v14.2s
umlal v25.2d, v3.2s, v24.2s
usra v26.2d, v11.2d, #26
umlal v1.2d, v5.2s, v9.2s
umlal v1.2d, v3.2s, v14.2s
usra v25.2d, v26.2d, #25
umlal v20.2d, v10.2s, v15.2s
umlal v20.2d, v29.2s, v18.2s
usra v22.2d, v25.2d, #26
umlal v27.2d, v10.2s, v6.2s
umlal v27.2d, v29.2s, v15.2s
usra v1.2d, v22.2d, #25
umlal v20.2d, v5.2s, v2.2s
umlal v20.2d, v3.2s, v9.2s
usra v16.2d, v1.2d, #26
umlal v21.2d, v12.2s, v2.2s
and v14.16B, v11.16B, v30.16B
usra v20.2d, v16.2d, #25
add x16, sp, #144
umlal v27.2d, v5.2s, v18.2s
add x22, sp, #424
umlal v27.2d, v3.2s, v2.2s
add x29, sp, #184
usra v21.2d, v20.2d, #26
add x1, sp, #224
ushr v8.2d, v30.2d, #1
add x19, sp, #544
and v10.16B, v1.16B, v30.16B
add x13, sp, #464
usra v27.2d, v21.2d, #25
and v28.16B, v25.16B, v30.16B
and v18.16B, v26.16B, v8.16B
usra v19.2d, v27.2d, #26
and v11.16B, v16.16B, v8.16B
and v20.16B, v20.16B, v30.16B
bic v17.16B, v19.16B, v8.16B
ld2 {v25.s, v26.s}[0], [x19], #8
usra v14.2d, v17.2d, #25
and v21.16B, v21.16B, v8.16B
and v29.16B, v22.16B, v8.16B
usra v14.2d, v17.2d, #24
ld2 {v0.s, v1.s}[0], [x19], #8
and v4.16B, v19.16B, v8.16B
usra v14.2d, v17.2d, #21
ld2 {v25.s, v26.s}[1], [x13], #8
and v3.16B, v27.16B, v30.16B
usra v18.2d, v14.2d, #26
and v17.16B, v14.16B, v30.16B
ld2 {v0.s, v1.s}[1], [x13], #8
st2 {v17.s, v18.s}[2], [x29], #8
st2 {v28.s, v29.s}[2], [x29], #8
st2 {v10.s, v11.s}[2], [x29], #8
st2 {v20.s, v21.s}[2], [x29], #8
st2 {v17.s, v18.s}[0], [x16], #8
st2 {v28.s, v29.s}[0], [x16], #8
st2 {v10.s, v11.s}[0], [x16], #8
st2 {v20.s, v21.s}[0], [x16], #8
ld2 {v21.s, v22.s}[0], [x19], #8
st2 {v3.s, v4.s}[2], [x29], #8
ld2 {v27.s, v28.s}[0], [x19], #8
mul v13.2s, v0.2s, v31.2s
add x29, sp, #504
ld2 {v21.s, v22.s}[1], [x13], #8
ld2 {v11.s, v12.s}[0], [x29], #8
ld2 {v14.s, v15.s}[0], [x19], #8
ld2 {v27.s, v28.s}[1], [x13], #8
ld2 {v11.s, v12.s}[1], [x22], #8
ld2 {v17.s, v18.s}[0], [x29], #8
ld2 {v14.s, v15.s}[1], [x13], #8
umull v24.2d, v11.2s, v26.2s
umull v19.2d, v11.2s, v25.2s
mul v7.2s, v15.2s, v31.2s
mul v29.2s, v14.2s, v31.2s
umull v8.2d, v11.2s, v14.2s
ld2 {v17.s, v18.s}[1], [x22], #8
umull v20.2d, v11.2s, v15.2s
umull v15.2d, v11.2s, v27.2s
umlal v19.2d, v17.2s, v29.2s
umull v6.2d, v11.2s, v28.2s
umlal v20.2d, v12.2s, v14.2s
umlal v8.2d, v17.2s, v27.2s
umull v23.2d, v11.2s, v0.2s
umull v9.2d, v11.2s, v21.2s
umlal v24.2d, v12.2s, v25.2s
st2 {v3.s, v4.s}[0], [x16], #8
umlal v20.2d, v17.2s, v28.2s
umlal v20.2d, v18.2s, v27.2s
umull v16.2d, v11.2s, v22.2s
umlal v24.2d, v18.2s, v29.2s
umlal v9.2d, v17.2s, v0.2s
umull v14.2d, v11.2s, v1.2s
ld2 {v10.s, v11.s}[0], [x29], #8
umlal v6.2d, v12.2s, v27.2s
umlal v6.2d, v17.2s, v22.2s
umlal v14.2d, v12.2s, v0.2s
mul v27.2s, v27.2s, v31.2s
ld2 {v4.s, v5.s}[0], [x29], #8
ld2 {v10.s, v11.s}[1], [x22], #8
umlal v14.2d, v17.2s, v26.2s
ld2 {v2.s, v3.s}[0], [x29], #8
umlal v20.2d, v10.2s, v22.2s
umlal v6.2d, v18.2s, v21.2s
ld2 {v4.s, v5.s}[1], [x22], #8
umlal v16.2d, v12.2s, v21.2s
umlal v16.2d, v17.2s, v1.2s
ld2 {v2.s, v3.s}[1], [x22], #8
umlal v8.2d, v10.2s, v21.2s
umlal v14.2d, v18.2s, v25.2s
umlal v14.2d, v11.2s, v29.2s
umlal v9.2d, v10.2s, v25.2s
umlal v9.2d, v4.2s, v29.2s
umlal v23.2d, v17.2s, v25.2s
umlal v23.2d, v10.2s, v29.2s
umlal v19.2d, v10.2s, v27.2s
umlal v16.2d, v18.2s, v0.2s
umlal v16.2d, v10.2s, v26.2s
umlal v16.2d, v11.2s, v25.2s
umlal v6.2d, v10.2s, v1.2s
umlal v6.2d, v11.2s, v0.2s
umlal v20.2d, v11.2s, v21.2s
umlal v15.2d, v17.2s, v21.2s
mul v21.2s, v21.2s, v31.2s
umlal v24.2d, v11.2s, v27.2s
umlal v20.2d, v4.2s, v1.2s
umlal v6.2d, v4.2s, v26.2s
umlal v19.2d, v4.2s, v21.2s
umlal v15.2d, v10.2s, v0.2s
umlal v15.2d, v4.2s, v25.2s
umlal v20.2d, v5.2s, v0.2s
umlal v8.2d, v4.2s, v0.2s
mul v0.2s, v28.2s, v31.2s
umlal v19.2d, v2.2s, v13.2s
umlal v9.2d, v2.2s, v27.2s
umlal v24.2d, v5.2s, v21.2s
umlal v14.2d, v5.2s, v27.2s
umlal v16.2d, v5.2s, v29.2s
umlal v16.2d, v3.2s, v27.2s
shl v12.2s, v12.2s, #1
umlal v15.2d, v2.2s, v29.2s
umlal v24.2d, v3.2s, v13.2s
umlal v23.2d, v4.2s, v27.2s
umlal v24.2d, v17.2s, v7.2s
umlal v8.2d, v2.2s, v25.2s
umlal v8.2d, v12.2s, v28.2s
umlal v14.2d, v3.2s, v21.2s
umlal v16.2d, v4.2s, v7.2s
umlal v16.2d, v2.2s, v0.2s
umlal v6.2d, v5.2s, v25.2s
umlal v23.2d, v2.2s, v21.2s
umlal v14.2d, v10.2s, v7.2s
mul v21.2s, v26.2s, v31.2s
umlal v24.2d, v10.2s, v0.2s
umlal v9.2d, v12.2s, v1.2s
mul v17.2s, v22.2s, v31.2s
umlal v23.2d, v12.2s, v26.2s
umlal v14.2d, v4.2s, v0.2s
mul v13.2s, v1.2s, v31.2s
umlal v24.2d, v4.2s, v17.2s
umlal v15.2d, v12.2s, v22.2s
umlal v19.2d, v12.2s, v7.2s
umlal v14.2d, v2.2s, v17.2s
shl v18.2s, v18.2s, #1
shl v12.2s, v11.2s, #1
umlal v24.2d, v2.2s, v13.2s
umlal v19.2d, v18.2s, v0.2s
umlal v9.2d, v18.2s, v26.2s
umlal v23.2d, v18.2s, v7.2s
umlal v23.2d, v12.2s, v0.2s
shl v27.2s, v5.2s, #1
shl v28.2s, v3.2s, #1
umlal v19.2d, v12.2s, v17.2s
umlal v19.2d, v27.2s, v13.2s
umlal v19.2d, v28.2s, v21.2s
umlal v23.2d, v27.2s, v17.2s
umlal v23.2d, v28.2s, v13.2s
umlal v9.2d, v12.2s, v7.2s
usra v24.2d, v19.2d, #26
umlal v15.2d, v18.2s, v1.2s
umlal v15.2d, v12.2s, v26.2s
usra v23.2d, v24.2d, #25
umlal v9.2d, v27.2s, v0.2s
umlal v9.2d, v28.2s, v17.2s
usra v14.2d, v23.2d, #26
umlal v15.2d, v27.2s, v7.2s
umlal v15.2d, v28.2s, v0.2s
usra v9.2d, v14.2d, #25
umlal v8.2d, v18.2s, v22.2s
umlal v8.2d, v12.2s, v1.2s
usra v16.2d, v9.2d, #26
umlal v6.2d, v3.2s, v29.2s
umlal v6.2d, v2.2s, v7.2s
usra v15.2d, v16.2d, #25
umlal v8.2d, v27.2s, v26.2s
umlal v8.2d, v28.2s, v7.2s
add x24, sp, #264
usra v6.2d, v15.2d, #26
umlal v20.2d, v2.2s, v26.2s
umlal v20.2d, v3.2s, v25.2s
usra v8.2d, v6.2d, #25
and v28.16B, v9.16B, v30.16B
and v1.16B, v23.16B, v30.16B
usra v20.2d, v8.2d, #26
ushr v2.2d, v30.2d, #1
and v21.16B, v19.16B, v30.16B
bic v27.16B, v20.16B, v2.16B
and v18.16B, v6.16B, v2.16B
usra v21.2d, v27.2d, #25
and v29.16B, v16.16B, v2.16B
and v25.16B, v8.16B, v30.16B
usra v21.2d, v27.2d, #24
and v26.16B, v20.16B, v2.16B
and v11.16B, v24.16B, v2.16B
usra v21.2d, v27.2d, #21
and v2.16B, v14.16B, v2.16B
and v17.16B, v15.16B, v30.16B
usra v11.2d, v21.2d, #26
and v10.16B, v21.16B, v30.16B
st2 {v10.s, v11.s}[0], [x1], #8
st2 {v10.s, v11.s}[2], [x24], #8
st2 {v1.s, v2.s}[2], [x24], #8
st2 {v1.s, v2.s}[0], [x1], #8
st2 {v28.s, v29.s}[0], [x1], #8
st2 {v28.s, v29.s}[2], [x24], #8
st2 {v17.s, v18.s}[2], [x24], #8
st2 {v25.s, v26.s}[2], [x24], #8
st2 {v17.s, v18.s}[0], [x1], #8
st2 {v25.s, v26.s}[0], [x1], #8
/* slothy optimized code ends */
ldr w27, [sp, #120]
add w27, w27, #1
str w27, [sp, #120]
cmp w27, #63
ble .L
ldr x0, [sp, #96]
// x
ldp x3, x4, [sp, #144]
ldp x5, x6, [sp, #160]
ldr x7, [sp, #176]
stp x3, x4, [x0, #0]
stp x5, x6, [x0, #16]
str x7, [x0, #32]
// y
ldp x3, x4, [sp, #184]
ldp x5, x6, [sp, #200]
ldr x7, [sp, #216]
// z
ldp x8, x9, [sp, #224]
ldp x10, x11, [sp, #240]
ldr x12, [sp, #256]
// z+y
add x13, x8, x3
add x14, x9, x4
add x15, x10, x5
add x16, x11, x6
add x17, x12, x7
// z-y
ldp x21, x23, [sp, #128]
add x20, x8, x21
add x22, x9, x23
add x24, x10, x23
add x26, x11, x23
add x28, x12, x23
sub x20, x20, x3
sub x22, x22, x4
sub x24, x24, x5
sub x26, x26, x6
sub x28, x28, x7
ldr w2, [sp, #592]
cmp w2, #1
// cselect(y,z+y,wantmont)
csel x3, x13, x3, eq
csel x4, x14, x4, eq
csel x5, x15, x5, eq
csel x6, x16, x6, eq
csel x7, x17, x7, eq
stp x3, x4, [x0, #40]
stp x5, x6, [x0, #56]
str x7, [x0, #72]
// cselect(z,z-y,wantmont)
csel x8, x20, x8, eq
csel x9, x22, x9, eq
csel x10, x24, x10, eq
csel x11, x26, x11, eq
csel x12, x28, x12, eq
stp x8, x9, [x0, #80]
stp x10, x11, [x0, #96]
str x12, [x0, #112]
ldp x29, x30, [sp, #80]
ldp x27, x28, [sp, #64]
ldp x25, x26, [sp, #48]
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp, #0]
add sp, sp, #608
ret
.section .note.GNU-stack,"",@progbits