-rw-r--r-- 28673 lib25519-20260614/crypto_pow/inv25519/arm64-safegcdneon/asm.S raw
#include "crypto_asm_hidden.h" /* This implementation has been done after studying the implementation provided in amd64-safegcd. */ .p2align 4 ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(asm) .globl _CRYPTO_SHARED_NAMESPACE(asm) ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(asm) .globl CRYPTO_SHARED_NAMESPACE(asm) _CRYPTO_SHARED_NAMESPACE(asm): CRYPTO_SHARED_NAMESPACE(asm): sub sp, sp, #928 stp x19, x20, [sp, #0] stp x21, x22, [sp, #16] stp x23, x24, [sp, #32] stp x25, x26, [sp, #48] stp x27, x28, [sp, #64] stp x29, x30, [sp, #80] str x1,[sp, #96] mov x28, #19 ldp x4, x5, [x0, #0] ldp x6, x7, [x0, #16] mov x3, x7 asr x3, x3, #63 bic x7, x7, 0x8000000000000000 and x3, x3, x28 add x3, x3, x28 adds x4, x4, x3 adcs x5, x5, xzr adcs x6, x6, xzr adc x7, x7, xzr mov x3, x7 asr x3, x3, #63 bic x7, x7, 0x8000000000000000 and x3, x3, x28 adds x4, x4, x3 adcs x5, x5, xzr adcs x6, x6, xzr adc x7, x7, xzr subs x4, x4, x28 sbcs x5, x5, xzr sbcs x6, x6, xzr sbc x7, x7, xzr bic x9, x4, 0xf000000000000000 ldr q10, [x2, #288] ldr q11, [x2, #304] str q10, [sp, #144] str q11, [sp, #160] mov x3, x4 and x3, x3, 0x3fffffff str x3, [sp, #160] ldr q10, [x2, #320] ldr q11, [x2, #336] str q10, [sp, #176] str q11, [sp, #192] mov x3, x4 lsr x3, x3, #30 and x3, x3, 0x3fffffff str x3, [sp, #192] ldr q10, [x2, #352] ldr q11, [x2, #368] str q10, [sp, #208] str q11, [sp, #224] mov x3, x5 lsl x3, x3, #4 lsr x4, x4, #60 orr x4, x3, x4 and x4, x4, 0x3fffffff str x4, [sp, #224] ldr q10, [x2, #384] ldr q11, [x2, #400] str q10, [sp, #240] str q11, [sp, #256] mov x4, x5 lsr x4, x4, #26 and x4, x4, 0x3fffffff str x4, [sp, #256] ldr q10, [x2, #416] ldr q11, [x2, #432] str q10, [sp, #272] str q11, [sp, #288] mov x3, x6 lsl x3, x3, #8 lsr x5, x5, #56 orr x5, x3, x5 and x5, x5, 0x3fffffff str x5, [sp, #288] ldr q10, [x2, #448] ldr q11, [x2, #464] str q10, [sp, #304] str q11, [sp, #320] mov x4, x6 lsr x4, x4, #22 and x4, x4, 0x3fffffff str x4, [sp, #320] ldr q10, [x2, #480] ldr q11, [x2, #496] str q10, [sp, #336] str q11, [sp, #352] mov x3, x7 lsl x3, x3, #12 lsr x6, x6, #52 orr x6, x3, x6 and x6, x6, 0x3fffffff str x6, [sp, #352] ldr q10, [x2, #512] ldr q11, [x2, #528] str q10, [sp, #368] str q11, [sp, #384] mov x4, x7 lsr x4, x4, #18 and x4, x4, 0x3fffffff str x4, [sp, #384] ldr q10, [x2, #544] ldr q11, [x2, #560] str q10, [sp, #400] str q11, [sp, #416] lsr x7, x7, #48 str x7, [sp, #416] mov x5, #-1 eor x4, x4, x4 stp x4, x5, [sp, #112] add x29, sp, #816 mov x6, #-0x4000000000000000 mov x5, #0x100000 stp x6, x5, [x29, #0] mov x6, #-0x20000000000 mov x5, #-0x100000 stp x6, x5, [x29, #16] movz x6, #0 movk x6, #0x0010, lsl 16 movk x6, #0x0200, lsl 32 str x6, [x29, #32] ldr q0, [x2, #0] str q0, [sp, #432] ldr q1, [x2, #16] str q1, [sp, #448] ldr q0, [x2, #32] str q0, [sp, #464] ldr q1, [x2, #48] str q1, [sp, #480] ldr q0, [x2, #64] str q0, [sp, #496] ldr q1, [x2, #80] str q1, [sp, #512] ldr q0, [x2, #96] str q0, [sp, #528] ldr q1, [x2, #112] str q1, [sp, #544] ldr q0, [x2, #128] str q0, [sp, #560] ldr q1, [x2, #144] str q1, [sp, #576] ldr q0, [x2, #160] str q0, [sp, #592] ldr q1, [x2, #176] str q1, [sp, #608] ldr q0, [x2, #192] str q0, [sp, #624] ldr q1, [x2, #208] str q1, [sp, #640] ldr q0, [x2, #224] str q0, [sp, #656] ldr q1, [x2, #240] str q1, [sp, #672] ldr q0, [x2, #256] ldr q1, [x2, #272] mov x8, #10 mov x10, #0x1000000000000000 eor x11, x11, x11 mov x12, x10 eor x13, x13, x13 mov x14, #-1 mov x15, #-1 mov x7, #-19 ._bigloop: mul x16, x9, x12 smulh x3, x9, x12 mul x29, x7, x13 smulh x30, x7, x13 adds x16, x16, x29 adc x3, x3, x30 lsl x3, x3, #4 lsr x16, x16, #60 orr x16, x16, x3 mul x1, x7, x10 smulh x6, x7, x10 mul x29, x9, x11 smulh x30, x9, x11 adds x7, x1, x29 adc x3, x6, x30 lsl x3, x3, #4 lsr x7, x7, #60 orr x7, x7, x3 mov v2.d[0], x11 mov v2.d[1], x13 mul x11, x15, x11 mul x15, x12, x15 mul x13, x14, x13 mul x14, x10, x14 add x14, x11, x14 add x15, x13, x15 add x7, x14, x7 add x2, x16, x15 ldr q4, [sp, #144] ldr q5, [sp, #160] bic x9, x7, x5 ._loop20_init: bic x3, x2, x5 mov v6.d[0], x10 ldr x29, [sp, #832] ldr x30, [sp, #816] add x9, x9, x29 add x3, x3, x30 mov v6.d[1], x12 ldr x10, [sp, #120] mov x11, x3 mov v8.d[0], v5.d[0] mov v8.d[1], v5.d[1] mov v9.d[0], v4.d[0] mov v9.d[1], v4.d[1] add x12, x3, x9 tst x3, #1 csel x10, x4, x10, ne mov v7.d[0], v6.d[1] mov v7.d[1], v6.d[1] mov v6.d[1], v6.d[0] csel x12, x3, x12, eq add x13, x4, #1 mov v3.d[0], v2.d[1] mov v3.d[1], v2.d[1] mov v2.d[1], v2.d[0] sub x3, x3, x9 asr x3, x3, #1 ldr q10, [sp, #560] ldr q11, [sp, #576] asr x12, x12, #1 neg x4, x4 and v12.16b, v6.16b, v10.16b and v13.16b, v7.16b, v11.16b cmp x10, xzr csel x9, x11, x9, ge and v14.16b, v2.16b, v10.16b and v15.16b, v3.16b, v11.16b csel x3, x12, x3, lt csel x4, x13, x4, lt xtn v31.2s, v4.2d xtn v16.2s, v12.2d smull v16.2d, v16.2s, v31.2s xtn v31.2s, v5.2d xtn v17.2s, v13.2d smull v17.2d, v17.2s, v31.2s mov x10, #-1 mov x11, x3 xtn v8.2s, v8.2d xtn v18.2s, v14.2d smull v18.2d, v18.2s, v8.2s xtn v9.2s, v9.2d xtn v19.2s, v15.2d smull v19.2d, v19.2s, v9.2s add x12, x3, x9 tst x3, #1 add v16.2d, v16.2d, v18.2d add v17.2d, v17.2d, v19.2d csel x10, x4, x10, ne csel x12, x3, x12, eq ldr q18, [sp, #528] ldr q19, [sp, #544] add x13, x4, #1 ldr q20, [sp, #432] ldr q21, [sp, #448] sub x3, x3, x9 asr x3, x3, #1 asr x12, x12, #1 xtn v31.2s, v16.2d xtn v18.2s, v18.2d smull v22.2d, v18.2s, v31.2s xtn v31.2s, v17.2d xtn v19.2s, v19.2d smull v23.2d, v19.2s, v31.2s neg x4, x4 cmp x10, xzr and v22.16b, v10.16b, v22.16b and v23.16b, v11.16b, v23.16b csel x9, x11, x9, ge csel x3, x12, x3, lt xtn v20.2s, v20.2d xtn v24.2s, v22.2d smull v24.2d, v24.2s, v20.2s xtn v21.2s, v21.2d xtn v25.2s, v23.2d smull v25.2d, v25.2s, v21.2s csel x4, x13, x4, lt ldr x10, [sp, 120] sub v16.2d, v16.2d, v24.2d sub v17.2d, v17.2d, v25.2d mov x11, x3 ldr q30, [sp, 656] ldr q31, [sp, 672] add v16.2d, v16.2d, v30.2d add v17.2d, v17.2d, v31.2d add x12, x3, x9 tst x3, #1 csel x10, x4, x10, ne csel x12, x3, x12, eq ushr v16.2d, v16.2d, #30 ushr v17.2d, v17.2d, #30 add x13, x4, #1 shl v22.2d, v22.2d, #15 shl v23.2d, v23.2d, #15 sub x3, x3, x9 asr x3, x3, #1 asr x12, x12, #1 ldr q24, [sp, 176] ldr q25, [sp, 192] neg x4, x4 mov v26.d[0], v25.d[0] mov v26.d[1], v25.d[1] mov v27.d[0], v24.d[0] mov v27.d[1], v24.d[1] cmp x10, xzr csel x9, x11, x9, ge csel x3, x12, x3, lt xtn v12.2s, v12.2d xtn v24.2s, v24.2d smull v28.2d, v24.2s, v12.2s xtn v13.2s, v13.2d xtn v25.2s, v25.2d smull v29.2d, v25.2s, v13.2s csel x4, x13, x4, lt xtn v14.2s, v14.2d xtn v26.2s, v26.2d smull v30.2d, v14.2s, v26.2s xtn v15.2s, v15.2d xtn v27.2s, v27.2d smull v31.2d, v15.2s, v27.2s mov x10, #-1 mov x11, x3 add v28.2d, v28.2d, v30.2d add v29.2d, v29.2d, v31.2d add x12, x3, x9 tst x3, #1 ushr v6.2d, v6.2d, #30 ushr v7.2d, v7.2d, #30 csel x10, x4, x10, ne ushr v2.2d, v2.2d, #30 ushr v3.2d, v3.2d, #30 csel x12, x3, x12, eq add x13, x4, #1 sub x3, x3, x9 xtn v6.2s, v6.2d xtn v4.2s, v4.2d smull v4.2d, v6.2s, v4.2s xtn v7.2s, v7.2d xtn v5.2s, v5.2d smull v5.2d, v7.2s, v5.2s asr x3, x3, #1 asr x12, x12, #1 xtn v2.2s, v2.2d smull v8.2d, v2.2s, v8.2s xtn v3.2s, v3.2d smull v9.2d, v3.2s, v9.2s neg x4, x4 cmp x10, xzr add v4.2d, v8.2d, v4.2d add v5.2d, v9.2d, v5.2d csel x9, x11, x9, ge csel x3, x12, x3, lt add v4.2d, v28.2d, v4.2d add v5.2d, v29.2d, v5.2d csel x4, x13, x4, lt mov x10, #-1 add v4.2d, v16.2d, v4.2d add v5.2d, v17.2d, v5.2d mov x11, x3 add x12, x3, x9 xtn v31.2s, v4.2d smull v8.2d, v31.2s, v18.2s xtn v31.2s, v5.2d smull v9.2d, v31.2s, v19.2s tst x3, #1 csel x10, x4, x10, ne csel x12, x3, x12, eq and v8.16b, v8.16b, v10.16b and v9.16b, v9.16b, v11.16b add x13, x4, #1 xtn v31.2s, v8.2d smull v16.2d, v31.2s, v20.2s xtn v31.2s, v9.2d smull v17.2d, v31.2s, v21.2s sub x3, x3, x9 asr x3, x3, #1 sub v4.2d, v4.2d, v16.2d sub v5.2d, v5.2d, v17.2d asr x12, x12, #1 neg x4, x4 add v4.2d, v0.2d, v4.2d add v5.2d, v1.2d, v5.2d cmp x10, xzr csel x9, x11, x9, ge ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 csel x3, x12, x3, lt csel x4, x13, x4, lt ldr x10, [sp, #120] shl v8.2d, v8.2d, #15 shl v9.2d, v9.2d, #15 mov x11, x3 ldr q16, [sp, #208] ldr q17, [sp, #224] add x12, x3, x9 tst x3, #1 mov v18.d[0], v17.d[0] mov v18.d[1], v17.d[1] mov v19.d[0], v16.d[0] mov v19.d[1], v16.d[1] csel x10, x4, x10, ne csel x12, x3, x12, eq smull v20.2d, v6.2s, v24.2s smull v21.2d, v7.2s, v25.2s add x13, x4, #1 sub x3, x3, x9 smull v24.2d, v2.2s, v26.2s smull v25.2d, v3.2s, v27.2s asr x3, x3, #1 add v20.2d, v20.2d, v24.2d add v21.2d, v21.2d, v25.2d asr x12, x12, #1 neg x4, x4 cmp x10, xzr xtn v16.2s, v16.2d smull v24.2d, v12.2s, v16.2s xtn v17.2s, v17.2d smull v25.2d, v13.2s, v17.2s csel x9, x11, x9, ge csel x3, x12, x3, lt xtn v18.2s, v18.2d smull v26.2d, v14.2s, v18.2s xtn v19.2s, v19.2d smull v27.2d, v15.2s, v19.2s csel x4, x13, x4, lt ldr x10, [sp, #120] add v24.2d, v26.2d, v24.2d add v25.2d, v27.2d, v25.2d mov x11, x3 add x12, x3, x9 add v24.2d, v20.2d, v24.2d add v25.2d, v21.2d, v25.2d tst x3, #1 csel x10, x4, x10, ne add v24.2d, v4.2d, v24.2d add v25.2d, v5.2d, v25.2d csel x12, x3, x12, eq add x13, x4, #1 add v4.2d, v0.2d, v24.2d add v5.2d, v1.2d, v25.2d sub x3, x3, x9 asr x3, x3, #1 ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 asr x12, x12, #1 ldr q20, [sp, #240] ldr q21, [sp, #256] neg x4, x4 cmp x10, xzr csel x9, x11, x9, ge mov v26.d[0], v21.d[0] mov v26.d[1], v21.d[1] mov v27.d[1], v20.d[1] mov v27.d[0], v20.d[0] csel x3, x12, x3, lt csel x4, x13, x4, lt smull v16.2d, v6.2s, v16.2s smull v17.2d, v7.2s, v17.2s smull v18.2d, v2.2s, v18.2s smull v19.2d, v3.2s, v19.2s ldr x10, [sp, #120] mov x11, x3 add x12, x3, x9 tst x3, #1 add v16.2d, v18.2d, v16.2d add v17.2d, v19.2d, v17.2d csel x10, x4, x10, ne csel x12, x3, x12, eq xtn v20.2s, v20.2d smull v18.2d, v12.2s, v20.2s xtn v21.2s, v21.2d smull v19.2d, v13.2s, v21.2s add x13, x4, #1 xtn v26.2s, v26.2d smull v28.2d, v14.2s, v26.2s xtn v27.2s, v27.2d smull v29.2d, v15.2s, v27.2s sub x3, x3, x9 asr x3, x3, #1 asr x12, x12, #1 add v18.2d, v28.2d, v18.2d add v19.2d, v29.2d, v19.2d neg x4, x4 cmp x10, xzr add v18.2d, v16.2d, v18.2d add v19.2d, v17.2d, v19.2d csel x9, x11, x9, ge csel x3, x12, x3, lt csel x4, x13, x4, lt add v18.2d, v4.2d, v18.2d add v19.2d, v5.2d, v19.2d mov x10, #-1 and v24.16b, v10.16b, v24.16b and v25.16b, v11.16b, v25.16b mov x11, x3 add x12, x3, x9 add v4.2d, v0.2d, v18.2d add v5.2d, v1.2d, v19.2d tst x3, #1 ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 csel x10, x4, x10, ne csel x12, x3, x12, eq add x13, x4, #1 and v18.16b, v10.16b, v18.16b and v19.16b, v11.16b, v19.16b sub x3, x3, x9 asr x3, x3, #1 str q18, [sp, #176] str q19, [sp, #192] asr x12, x12, #1 shl v16.2d, v18.2d, #30 shl v17.2d, v19.2d, #30 neg x4, x4 cmp x10, xzr csel x9, x11, x9, ge add v16.2d, v16.2d, v24.2d add v17.2d, v17.2d, v25.2d csel x3, x12, x3, lt csel x4, x13, x4, lt str q16, [sp, #752] str q17, [sp, #768] ldr x10, [sp, #120] mov x11, x3 add x12, x3, x9 ldr q16, [sp, #272] ldr q17, [sp, #288] mov v18.d[0], v17.d[0] mov v18.d[1], v17.d[1] mov v19.d[1], v16.d[1] mov v19.d[0], v16.d[0] tst x3, #1 csel x10, x4, x10, ne csel x12, x3, x12, eq smull v20.2d, v6.2s, v20.2s smull v21.2d, v7.2s, v21.2s add x13, x4, #1 sub x3, x3, x9 smull v26.2d, v2.2s, v26.2s smull v27.2d, v3.2s, v27.2s asr x3, x3, #1 asr x12, x12, #1 add v20.2d, v20.2d, v26.2d add v21.2d, v21.2d, v27.2d neg x4, x4 xtn v16.2s, v16.2d smull v26.2d, v12.2s, v16.2s xtn v17.2s, v17.2d smull v27.2d, v13.2s, v17.2s cmp x10, xzr csel x9, x11, x9, ge csel x3, x12, x3, lt xtn v18.2s, v18.2d smull v28.2d, v14.2s, v18.2s xtn v19.2s, v19.2d smull v29.2d, v15.2s, v19.2s csel x4, x13, x4, lt mov x10, #-1 add v26.2d, v28.2d, v26.2d add v27.2d, v29.2d, v27.2d mov x11, x3 add x12, x3, x9 add v26.2d, v26.2d, v20.2d add v27.2d, v27.2d, v21.2d tst x3, #1 csel x10, x4, x10, ne csel x12, x3, x12, eq add v26.2d, v4.2d, v26.2d add v27.2d, v5.2d, v27.2d add x13, x4, #1 sub x3, x3, x9 add v4.2d, v26.2d, v0.2d add v5.2d, v27.2d, v1.2d ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 asr x3, x3, #1 asr x12, x12, #1 neg x4, x4 ldr q20, [sp, #304] ldr q21, [sp, #320] cmp x10, xzr csel x9, x11, x9, ge mov v28.d[0], v21.d[0] mov v28.d[1], v21.d[1] mov v29.d[1], v20.d[1] mov v29.d[0], v20.d[0] csel x3, x12, x3, lt csel x4, x13, x4, lt mov x10, #-1 smull v16.2d, v6.2s, v16.2s smull v17.2d, v7.2s, v17.2s mov x11, x3 smull v18.2d, v2.2s, v18.2s smull v19.2d, v3.2s, v19.2s add x12, x3, x9 tst x3, #1 add v16.2d, v16.2d, v18.2d add v17.2d, v17.2d, v19.2d csel x10, x4, x10, ne csel x12, x3, x12, eq add x13, x4, #1 xtn v20.2s, v20.2d smull v18.2d, v12.2s, v20.2s xtn v21.2s, v21.2d smull v19.2d, v13.2s, v21.2s xtn v28.2s, v28.2d smull v30.2d, v14.2s, v28.2s xtn v29.2s, v29.2d smull v31.2d, v15.2s, v29.2s sub x3, x3, x9 asr x3, x3, #1 asr x12, x12, #1 add v18.2d, v18.2d, v30.2d add v19.2d, v19.2d, v31.2d neg x4, x4 cmp x10, xzr add v18.2d, v16.2d, v18.2d add v19.2d, v17.2d, v19.2d csel x9, x11, x9, ge csel x3, x12, x3, lt add v18.2d, v4.2d, v18.2d add v19.2d, v5.2d, v19.2d csel x4, x13, x4, lt ldr x10, [sp, #120] add v4.2d, v0.2d, v18.2d add v5.2d, v1.2d, v19.2d mov x11, x3 add x12, x3, x9 ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 tst x3, #1 csel x10, x4, x10, ne and v18.16b, v10.16b, v18.16b and v19.16b, v11.16b, v19.16b csel x12, x3, x12, eq add x13, x4, #1 and v26.16b, v10.16b, v26.16b and v27.16b, v11.16b, v27.16b sub x3, x3, x9 asr x3, x3, #1 str q26, [sp, #208] str q27, [sp, #224] asr x12, x12, #1 neg x4, x4 cmp x10, xzr shl v16.2d, v18.2d, #30 shl v17.2d, v19.2d, #30 csel x9, x11, x9, ge add v16.2d, v16.2d, v26.2d add v17.2d, v17.2d, v27.2d csel x3, x12, x3, lt csel x4, x13, x4, lt str q16, [sp, #784] str q17, [sp, #800] ldr x10, [sp, #120] mov x11, x3 ldr q16, [sp, #336] ldr q17, [sp, #352] add x12, x3, x9 tst x3, #1 mov v26.d[0], v17.d[0] mov v26.d[1], v17.d[1] mov v27.d[1], v16.d[1] mov v27.d[0], v16.d[0] csel x10, x4, x10, ne csel x12, x3, x12, eq smull v20.2d, v6.2s, v20.2s smull v21.2d, v7.2s, v21.2s add x13, x4, #1 sub x3, x3, x9 smull v28.2d, v2.2s, v28.2s smull v29.2d, v3.2s, v29.2s asr x3, x3, #1 add v20.2d, v20.2d, v28.2d add v21.2d, v21.2d, v29.2d asr x12, x12, #1 neg x4, x4 cmp x10, xzr xtn v16.2s, v16.2d smull v28.2d, v12.2s, v16.2s xtn v17.2s, v17.2d smull v29.2d, v13.2s, v17.2s csel x9, x11, x9, ge csel x3, x12, x3, lt xtn v26.2s, v26.2d smull v30.2d, v14.2s, v26.2s xtn v27.2s, v27.2d smull v31.2d, v15.2s, v27.2s csel x4, x13, x4, lt ldr x10, [sp, #120] add v28.2d, v28.2d, v30.2d add v29.2d, v29.2d, v31.2d mov x11, x3 add x12, x3, x9 tst x3, #1 add v28.2d, v28.2d, v20.2d add v29.2d, v29.2d, v21.2d csel x10, x4, x10, ne csel x12, x3, x12, eq add x13, x4, #1 add v28.2d, v28.2d, v4.2d add v29.2d, v29.2d, v5.2d sub x3, x3, x9 asr x3, x3, #1 add v4.2d, v28.2d, v0.2d add v5.2d, v29.2d, v1.2d ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 asr x12, x12, #1 neg x4, x4 cmp x10, xzr str q18, [sp, #240] str q19, [sp, #256] ldr q18, [sp, #368] ldr q19, [sp, #384] csel x9, x11, x9, ge csel x3, x12, x3, lt csel x4, x13, x4, lt mov v20.d[0], v19.d[0] mov v20.d[1], v19.d[1] mov v21.d[1], v18.d[1] mov v21.d[0], v18.d[0] ldr x10, [sp, #120] smull v16.2d, v6.2s, v16.2s smull v17.2d, v7.2s, v17.2s mov x11, x3 add x12, x3, x9 tst x3, #1 smull v26.2d, v2.2s, v26.2s smull v27.2d, v3.2s, v27.2s csel x10, x4, x10, ne csel x12, x3, x12, eq add v16.2d, v26.2d, v16.2d add v17.2d, v27.2d, v17.2d add x13, x4, #1 sub x3, x3, x9 xtn v18.2s, v18.2d smull v26.2d, v12.2s, v18.2s xtn v19.2s, v19.2d smull v27.2d, v13.2s, v19.2s asr x3, x3, #1 asr x12, x12, #1 xtn v20.2s, v20.2d smull v30.2d, v14.2s, v20.2s xtn v21.2s, v21.2d smull v31.2d, v15.2s, v21.2s neg x4, x4 cmp x10, xzr add v26.2d, v26.2d, v30.2d add v27.2d, v27.2d, v31.2d csel x9, x11, x9, ge csel x3, x12, x3, lt add v26.2d, v16.2d, v26.2d add v27.2d, v17.2d, v27.2d csel x4, x13, x4, lt ldr x10, [sp, #120] add v26.2d, v26.2d, v4.2d add v27.2d, v27.2d, v5.2d mov x11, x3 add x12, x3, x9 add v4.2d, v26.2d, v0.2d add v5.2d, v27.2d, v1.2d tst x3, #1 csel x10, x4, x10, ne ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 csel x12, x3, x12, eq and v28.16b, v28.16b, v10.16b and v29.16b, v29.16b, v11.16b add x13, x4, #1 sub x3, x3, x9 asr x3, x3, #1 str q28, [sp, #272] str q29, [sp, #288] asr x12, x12, #1 ldr q16, [sp, #400] ldr q17, [sp, #416] neg x4, x4 cmp x10, xzr csel x9, x11, x9, ge mov v28.d[0], v17.d[0] mov v28.d[1], v17.d[1] mov v29.d[1], v16.d[1] mov v29.d[0], v16.d[0] csel x3, x12, x3, lt csel x4, x13, x4, lt smull v18.2d, v6.2s, v18.2s smull v19.2d, v7.2s, v19.2s mov x10, #-1 mov x11, x3 smull v20.2d, v2.2s, v20.2s smull v21.2d, v3.2s, v21.2s add x12, x3, x9 tst x3, #1 csel x10, x4, x10, ne add v18.2d, v20.2d, v18.2d add v19.2d, v21.2d, v19.2d csel x12, x3, x12, eq xtn v16.2s, v16.2d smull v12.2d, v12.2s, v16.2s xtn v17.2s, v17.2d smull v13.2d, v13.2s, v17.2s add x13, x4, #1 sub x3, x3, x9 xtn v28.2s, v28.2d smull v14.2d, v28.2s, v14.2s xtn v29.2s, v29.2d smull v15.2d, v29.2s, v15.2s asr x3, x3, #1 add v12.2d, v12.2d, v14.2d add v13.2d, v13.2d, v15.2d asr x12, x12, #1 neg x4, x4 cmp x10, xzr csel x9, x11, x9, ge add v12.2d, v12.2d, v18.2d add v13.2d, v13.2d, v19.2d add v12.2d, v12.2d, v4.2d add v13.2d, v13.2d, v5.2d csel x3, x12, x3, lt csel x4, x13, x4, lt ldr x10, [sp, #120] add v12.2d, v22.2d, v12.2d add v13.2d, v23.2d, v13.2d mov x11, x3 add x12, x3, x9 add v4.2d, v0.2d, v12.2d add v5.2d, v1.2d, v13.2d tst x3, #1 ushr v4.2d, v4.2d, #30 ushr v5.2d, v5.2d, #30 csel x10, x4, x10, ne csel x12, x3, x12, eq add x13, x4, #1 and v26.16b, v10.16b, v26.16b and v27.16b, v11.16b, v27.16b sub x3, x3, x9 asr x3, x3, #1 asr x12, x12, #1 str q26, [sp, #304] str q27, [sp, #320] neg x4, x4 smull v6.2d, v16.2s, v6.2s smull v7.2d, v17.2s, v7.2s cmp x10, #1 csel x9, x11, x9, ge smull v2.2d, v28.2s, v2.2s smull v3.2d, v29.2s, v3.2s csel x3, x12, x3, lt csel x4, x13, x4, lt add v2.2d, v6.2d, v2.2d add v3.2d, v7.2d, v3.2d ldr x10, [sp, #120] mov x11, x3 add v8.2d, v2.2d, v8.2d add v9.2d, v3.2d, v9.2d add x12, x3, x9 tst x3, #1 add v8.2d, v4.2d, v8.2d add v9.2d, v5.2d, v9.2d csel x10, x4, x10, ne csel x12, x3, x12, eq add v2.2d, v8.2d, v0.2d add v3.2d, v9.2d, v1.2d add x13, x4, #1 sub x3, x3, x9 ushr v2.2d, v2.2d, #30 ushr v3.2d, v3.2d, #30 asr x3, x3, #1 and v12.16b, v10.16b, v12.16b and v13.16b, v11.16b, v13.16b asr x12, x12, #1 neg x4, x4 cmp x10, xzr str q12, [sp, #336] str q13, [sp, #352] csel x9, x11, x9, ge ldr q4, [sp, #496] ldr q5, [sp, #512] csel x3, x12, x3, lt csel x4, x13, x4, lt ._extract_init: ldr x10, [sp, #848] add x11, x3, x10 ldr q6, [sp, #624] ldr q7, [sp, #640] add v12.2d, v2.2d, v0.2d add v13.2d, v3.2d, v1.2d asr x11, x11, #42 mov x12, x2 ushr v12.2d, v12.2d, #15 ushr v13.2d, v13.2d, #15 mul x2, x2, x11 add x10, x9, x10 sub v12.2d, v12.2d, v6.2d sub v13.2d, v13.2d, v7.2d asr x10, x10, #42 mul x12, x12, x10 ldr q30, [sp, #592] ldr q31, [sp, #608] sub v2.2d, v2.2d, v30.2d sub v3.2d, v3.2d, v31.2d ldr x13, [sp, #824] add x3, x3, x13 and v8.16b, v8.16b, v10.16b and v9.16b, v9.16b, v11.16b lsl x3, x3, #22 asr x3, x3, #43 and v2.16b, v2.16b, v4.16b and v3.16b, v3.16b, v5.16b mov x14, x7 mul x14, x3, x14 ldr q28, [sp, #464] ldr q29, [sp, #480] xtn v30.2s, v12.2d xtn v31.2s, v28.2d smull v4.2d, v30.2s, v31.2s xtn v30.2s, v13.2d xtn v31.2s, v29.2d smull v5.2d, v30.2s, v31.2s add x9, x9, x13 add v24.2d, v24.2d, v4.2d add v25.2d, v25.2d, v5.2d lsl x9, x9, #22 asr x9, x9, #43 mul x7, x9, x7 str q8, [sp, #368] str q9, [sp, #384] add x7, x12, x7 add x2, x14, x2 str q2, [sp, #400] str q3, [sp, #416] asr x7, x7, #20 str q24, [sp, #144] str q25, [sp, #160] asr x2, x2, #20 str x10, [sp, #720] str x9, [sp, #688] str x11, [sp, #704] str x3, [sp, #736] mov x9, #2 ._loop20: bic x3, x7, x5 bic x10, x2, x5 ldr x29, [sp, #832] ldr x30, [sp, #816] add x3, x3, x29 add x10, x10, x30 mov x11, #2 ._loop2: mov x12, #-1 mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt ldr x12, [sp, #120] mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt mov x12, #-1 mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt ldr x12, [sp, #120] mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt ldr x12, [sp, #120] mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt ldr x12, [sp, #120] mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt ldr x12, [sp, #120] mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt ldr x12, [sp, #120] mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt ldr x12, [sp, #120] mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt mov x12, #-1 mov x13, x10 add x14, x10, x3 tst x10, #1 csel x12, x4, x12, ne csel x14, x10, x14, eq add x15, x4, #1 sub x10, x10, x3 asr x10, x10, #1 asr x14, x14, #1 neg x4, x4 cmp x12, xzr csel x3, x13, x3, ge csel x10, x14, x10, lt csel x4, x15, x4, lt subs x11, x11, #1 bgt ._loop2 subs x9, x9, #1 beq ._lastloop ._extract: ldr x11, [sp, #848] add x12, x10, x11 asr x12, x12, #42 mov x13, x2 mul x2, x2, x12 add x11, x3, x11 asr x11, x11, #42 mul x13, x11, x13 ldr x14, [sp, #824] add x10, x10, x14 lsl x10, x10, #22 asr x10, x10, #43 mov x15, x7 mul x15, x10, x15 add x3, x3, x14 lsl x3, x3, #22 asr x3, x3, #43 mul x7, x3, x7 add x7, x13, x7 add x2, x15, x2 asr x7, x7, #20 asr x2, x2, #20 ldr x13, [sp, #688] mul x13, x3, x13 ldr x14, [sp, #736] mul x14, x11, x14 ldr x15, [sp, #720] mul x3, x15, x3 ldr x16, [sp, #704] mul x11, x16, x11 mul x15, x10, x15 mul x16, x12, x16 ldr x30, [sp, #688] mul x10, x30, x10 ldr x30, [sp, #736] mul x12, x30, x12 add x11, x3, x11 add x3, x13, x14 add x10, x12, x10 add x12, x15, x16 ._first_loop: str x11, [sp, #720] str x3, [sp, #688] str x12, [sp, #704] str x10, [sp, #736] b ._loop20 ._lastloop: ldr x7, [sp, #848] add x2, x10, x7 asr x2, x2, #42 add x11, x3, x7 asr x11, x11, #42 ldr x7, [sp, #736] mul x7, x11, x7 ldr x9, [sp, #704] mul x11, x9, x11 mul x9, x2, x9 ldr x12, [sp, #824] add x13, x10, x12 lsl x13, x13, #22 asr x13, x13, #43 add x3, x3, x12 lsl x3, x3, #22 asr x3, x3, #43 ldr x10, [sp, #688] mul x10, x3, x10 ldr x12, [sp, #720] mul x3, x12, x3 mul x12, x13, x12 ldr x30, [sp, #736] mul x2, x30, x2 ldr x30, [sp, #688] mul x13, x30, x13 add x11, x3, x11 add x10, x10, x7 add x13, x2, x13 add x12, x12, x9 ldr x7, [sp, #752] ldr x9, [sp, #768] ldr x14, [sp, #784] ldr x15, [sp, #800] subs x8, x8, #1 bne ._bigloop lsl x14, x14, #60 lsl x15, x15, #60 add x7, x14, x7 add x9, x15, x9 mul x7, x10, x7 mul x9, x11, x9 add x7, x7, x9 asr x7, x7, #60 mul x10, x7, x10 mul x11, x7, x11 ._cneg: ldr x3, [sp, #408] mul x4, x3, x10 smulh x7, x3, x10 ldr x3, [sp, #424] mul x29, x3, x11 smulh x30, x3, x11 adds x4, x4, x29 adc x7, x7, x30 mov x3, x4 lsl x7, x7, #48 lsr x3, x3, #16 orr x7, x7, x3 lsl x4, x4, #48 ldr x3, [sp, #344] ldr x2, [sp, #376] lsl x2, x2, #30 add x3, x2, x3 mul x8, x3, x10 smulh x5, x3, x10 ldr x3, [sp, #360] ldr x2, [sp, #392] lsl x2, x2, #30 add x3, x2, x3 mul x29, x3, x11 smulh x30, x3, x11 adds x8, x8, x29 adc x5, x5, x30 mov x9, x8 lsr x8, x8, #12 lsl x30, x5, #52 orr x8, x8, x30 lsl x9, x9, #52 asr x5, x5, #12 adds x4, x8, x4 adc x7, x5, x7 ldr x3, [sp, #280] ldr x2, [sp, #312] lsl x2, x2, #30 add x3, x2, x3 mul x8, x3, x10 smulh x5, x3, x10 ldr x3, [sp, #296] ldr x2, [sp, #328] lsl x2, x2, #30 add x3, x2, x3 mul x29, x3, x11 smulh x30, x3, x11 adds x8, x8, x29 adc x5, x5, x30 mov x12, x8 lsr x8, x8, #8 lsl x30, x5, #56 orr x8, x8, x30 lsl x12, x12, #50 mov x2, x5 asr x5, x5, 8 asr x2, x2, 63 adds x9, x9, x8 adcs x4, x4, x5 adc x7, x2, x7 ldr x3, [sp, #216] ldr x2, [sp, #248] lsl x2, x2, #30 add x3, x2, x3 mul x8, x3, x10 smulh x5, x3, x10 ldr x3, [sp, #232] ldr x2, [sp, #264] lsl x2, x2, #30 add x3, x2, x3 mul x29, x3, x11 smulh x30, x3, x11 adds x8, x8, x29 adc x5, x5, x30 mov x13, x8 mov x30, x5 lsr x8, x8, #4 lsl x30, x30, #60 orr x8, x8, x30 lsl x13, x13, #60 mov x2, x5 asr x5, x5, #4 asr x2, x2, #63 adds x12, x8, x12 adcs x9, x5, x9 adcs x4, x2, x4 adc x7, x2, x7 ldr x3, [sp, #152] ldr x2, [sp, #184] lsl x2, x2, #30 add x3, x3, x2 mul x8, x3, x10 smulh x5, x3, x10 ldr x3, [sp, #168] ldr x2, [sp, #200] lsl x2, x2, #30 add x3, x3, x2 mul x29, x3, x11 smulh x30, x3, x11 adds x8, x8, x29 adc x5, x5, x30 mov x2, x5 asr x2, x2, #63 adds x13, x8, x13 adcs x12, x5, x12 adcs x9, x2, x9 adcs x4, x2, x4 adcs x7, x2, x7 adds x29, x4, x4 bic x4, x4, 0x8000000000000000 adc x7, x7, x7 mov x5, x7 mul x29, x28, x7 smulh x30, x28, x7 asr x5, x5, #63 eor x7, x7, x7 adds x13, x29, x13 adcs x12, x30, x12 adcs x9, x5, x9 adcs x4, x5, x4 adcs x7, x5, x7 adds x29, x4, x4 bic x4, x4, 0x8000000000000000 adc x7, x7, x7 mov x5, x7 mul x29, x28, x7 smulh x30, x28, x7 asr x5, x5, #63 adds x13, x29, x13 adcs x12, x30, x12 adcs x9, x5, x9 adcs x4, x5, x4 adc x7, x5, x7 eor x7, x7, x7 mov x2, #-19 mov x5, #-1 mov x8, #0x7fffffffffffffff cmp x4, xzr csel x2, x7, x2, ge csel x5, x7, x5, ge csel x8, x7, x8, ge adds x13, x2, x13 adcs x12, x5, x12 adcs x9, x5, x9 adc x4, x8, x4 adds x13, x28, x13 adcs x12, x12, xzr adcs x9, x9, xzr adcs x4, x4, xzr mov x7, x4 asr x7, x7, #63 bic x4, x4, 0x8000000000000000 and x7, x7, x28 adds x13, x7, x13 adcs x12, x12, xzr adcs x9, x9, xzr adc x4, x4, xzr subs x1, x13, x28 sbcs x2, x12, xzr sbcs x3, x9, xzr sbc x4, x4, xzr ldr x0, [sp, #96] stp x1, x2, [x0, #0] stp x3, x4, [x0, #16] ldp x29, x30, [sp, #80] ldp x27, x28, [sp, #64] ldp x25, x26, [sp, #48] ldp x23, x24, [sp, #32] ldp x21, x22, [sp, #16] ldp x19, x20, [sp, #0] add sp, sp, #928 ret .section .note.GNU-stack,"",@progbits