-rw-r--r-- 125604 lib25519-20260614/crypto_nP/montgomery25519/arm64-uma10l/mladder.S raw
#include "crypto_asm_hidden.h"
// linker define mladder
/* Assembly for Montgomery ladder */
.p2align 4
ASM_HIDDEN _CRYPTO_SHARED_NAMESPACE(mladder)
.globl _CRYPTO_SHARED_NAMESPACE(mladder)
ASM_HIDDEN CRYPTO_SHARED_NAMESPACE(mladder)
.globl CRYPTO_SHARED_NAMESPACE(mladder)
_CRYPTO_SHARED_NAMESPACE(mladder):
CRYPTO_SHARED_NAMESPACE(mladder):
sub sp, sp, #624
stp x19, x20, [sp, #0]
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
stp x0, x2, [sp, #96]
// clamp scalar
ldr x3, [x2, #0]
and x3, x3, #0xfffffffffffffff8
str x3, [x2, #0]
ldr x4, [x2, #24]
orr x4, x4, #0x4000000000000000
str x4, [x2, #24]
// XP
ldp x4, x5, [x1, #0]
ldp x6, x7, [x1, #16]
// X1 ← XP
and w8, w4, #0x3ffffff
ubfx x9, x4, #26, #25
lsr x10, x4, #51
orr w10, w10, w5, lsl #13
and w10, w10, #0x3ffffff
ubfx x11, x5, #13, #25
lsr x12, x5, #38
and w13, w6, #0x1ffffff
ubfx x14, x6, #25, #26
lsr x15, x6, #51
orr w15, w15, w7, lsl #13
and w15, w15, #0x1ffffff
ubfx x16, x7, #12, #26
ubfx x17, x7, #38, #25
// store X1
add x0, sp, #144
stp w8, w9, [x0, #0]
stp w10, w11, [x0, #8]
stp w12, w13, [x0, #16]
stp w14, w15, [x0, #24]
stp w16, w17, [x0, #32]
// X3 ← X1
add x0, sp, #264
stp w8, w9, [x0, #0]
stp w10, w11, [x0, #8]
stp w12, w13, [x0, #16]
stp w14, w15, [x0, #24]
stp w16, w17, [x0, #32]
// X2 ← 1
mov x10, #1
stp x10, xzr, [sp, #184]
stp xzr, xzr, [sp, #200]
str xzr, [sp, 216]
// Z2 ← 0
stp xzr, xzr, [sp, #224]
stp xzr, xzr, [sp, #240]
str xzr, [sp, #256]
// Z3 ← 1
stp x10, xzr, [sp, #304]
stp xzr, xzr, [sp, #320]
str xzr, [sp, #336]
mov w30, #19
movz x1, #0xffda
movk x1, #0x07ff, lsl 16
movk x1, #0xfffe, lsl 32
movk x1, #0x03ff, lsl 48
movz x2, #0xfffe
movk x2, #0x07ff, lsl 16
movk x2, #0xfffe, lsl 32
movk x2, #0x03ff, lsl 48
stp x1, x2, [sp, #128]
// pre-process for the bit n[254] = 1
// T2 = 2X3
ldp x10, x12, [sp, #264]
ldp x14, x16, [sp, #280]
ldr x18, [sp, #296]
add x20, x10, x10
add x22, x12, x12
add x24, x14, x14
add x26, x16, x16
add x28, x18, x18
stp x20, x22, [sp, #384]
stp x24, x26, [sp, #400]
str x28, [sp, #416]
// T1 = 4X3 = 2T2
add x20, x20, x20
add x22, x22, x22
add x24, x24, x24
add x26, x26, x26
add x28, x28, x28
stp x20, x22, [sp, #344]
stp x24, x26, [sp, #360]
str x28, [sp, #376]
// T = X3^2 + 1
lsr x11, x10, #32
lsr x13, x12, #32
lsr x15, x14, #32
lsr x17, x16, #32
lsr x19, x18, #32
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x0, x0, #1
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
// T3 ← (X3 + 1)^2 = X3^2 + 1 + 2X3
ldp x10, x12, [sp, #384]
ldp x14, x16, [sp, #400]
ldr x18, [sp, #416]
add x20, x0, x10
add x22, x2, x12
add x24, x4, x14
add x26, x6, x16
add x28, x8, x18
stp x20, x22, [sp, #424]
stp x24, x26, [sp, #440]
str x28, [sp, #456]
// T4 ← (X3 - 1)^2 = X3^2 + 1 - 2X3
movz x3, #0xffb4
movk x3, #0x0fff, lsl 16
movk x3, #0xfffc, lsl 32
movk x3, #0x07ff, lsl 48
movz x5, #0xfffc
movk x5, #0x0fff, lsl 16
movk x5, #0xfffc, lsl 32
movk x5, #0x07ff, lsl 48
add x20, x0, x3
sub x20, x20, x10
lsr x21, x20, #32
mov w20, w20
add x22, x2, x5
sub x22, x22, x12
lsr x23, x22, #32
mov w22, w22
add x24, x4, x5
sub x24, x24, x14
lsr x25, x24, #32
mov w24, w24
add x26, x6, x5
sub x26, x26, x16
lsr x27, x26, #32
mov w26, w26
add x28, x8, x5
sub x28, x28, x18
lsr x29, x28, #32
mov w28, w28
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x27, x27, x26, lsr #26
and x26, x26, #0x3ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
add x28, x28, x27, lsr #25
and x27, x27, #0x1ffffff
add x23, x23, x22, lsr #26
and x22, x22, #0x3ffffff
add x29, x29, x28, lsr #26
and x28, x28, #0x3ffffff
add x24, x24, x23, lsr #25
and x23, x23, #0x1ffffff
bic x7, x29, #0x1ffffff
add x20, x20, x7, lsr #25
add x20, x20, x7, lsr #24
add x20, x20, x7, lsr #21
and x29, x29, #0x1ffffff
add x25, x25, x24, lsr #26
and x24, x24, #0x3ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
add x0, sp, #464
stp w20, w21, [x0, #0]
stp w22, w23, [x0, #8]
stp w24, w25, [x0, #16]
stp w26, w27, [x0, #24]
stp w28, w29, [x0, #32]
// T2 ← ((A + 2)/4) · T1 + T4
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
movz x0, #0xdb42
movk x0, #0x0001, lsl 16
umaddl x20, w10, w0, x20
umaddl x21, w11, w0, x21
umaddl x22, w12, w0, x22
umaddl x23, w13, w0, x23
umaddl x24, w14, w0, x24
umaddl x25, w15, w0, x25
umaddl x26, w16, w0, x26
umaddl x27, w17, w0, x27
umaddl x28, w18, w0, x28
umaddl x29, w19, w0, x29
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x27, x27, x26, lsr #26
and x26, x26, #0x3ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
add x28, x28, x27, lsr #25
and x27, x27, #0x1ffffff
add x23, x23, x22, lsr #26
and x22, x22, #0x3ffffff
add x29, x29, x28, lsr #26
and x28, x28, #0x3ffffff
add x24, x24, x23, lsr #25
and x23, x23, #0x1ffffff
bic x7, x29, #0x1ffffff
add x20, x20, x7, lsr #25
add x20, x20, x7, lsr #24
add x20, x20, x7, lsr #21
and x29, x29, #0x1ffffff
add x25, x25, x24, lsr #26
and x24, x24, #0x3ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
bfi x20, x21, #32, #25
bfi x22, x23, #32, #25
bfi x24, x25, #32, #25
bfi x26, x27, #32, #25
bfi x28, x29, #32, #25
stp x20, x22, [sp, #384]
stp x24, x26, [sp, #400]
str x28, [sp, #416]
// X2 ← T3 · T4
add x0, sp, #424
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
ldp w20, w21, [x0, #40]
ldp w22, w23, [x0, #48]
ldp w24, w25, [x0, #56]
ldp w26, w27, [x0, #64]
ldp w28, w29, [x0, #72]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #184]
stp x4, x6, [sp, #200]
str x8, [sp, #216]
// Z2 ← T1 · T2
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
ldp w20, w21, [x0, #40]
ldp w22, w23, [x0, #48]
ldp w24, w25, [x0, #56]
ldp w26, w27, [x0, #64]
ldp w28, w29, [x0, #72]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #224]
stp x4, x6, [sp, #240]
str x8, [sp, #256]
mov x4, #253
mov x5, #1
stp x4, x5, [sp, #112]
.L0:
/*
* Montgomery ladder step
*
* T1 ← X2 + Z2
* T2 ← X2 - Z2
* T3 ← X3 + Z3
* T4 ← X3 - Z3
*
* bit ← n[i]
* T6 = CSelect(T2,T4,bit,prevbit): if (bit <> prevbit) {T6 = T4} else {T6 = T2}
* T5 = CSelect(T1,T3,bit,prevbit): if (bit <> prevbit) {T5 = T3} else {T5 = T1}
* prevbit ← bit
*
* X3 ← T1 · T4
* Z3 ← T2 · T3
* T6 ← T6^2
* T5 ← T5^2
* T8 ← X3 + Z3
* T7 ← X3 - Z3
* T1 ← T7^2
* X3 ← T8^2
* T7 ← T5 - T6
* T8 ← ((A + 2)/4) · T7
* T8 ← T8 + T6
* X2 ← T5 · T6
* Z3 ← T1 · X1
* Z2 ← T7 · T8
*
*/
// T1 = X2 + Z2, T2 = X2 - Z2
ldp x8, x9, [sp, #128]
ldp x11, x13, [sp, #184]
ldp x15, x17, [sp, #200]
ldr x19, [sp, #216]
ldp x10, x12, [sp, #224]
ldp x14, x16, [sp, #240]
ldr x18, [sp, #256]
add x0, x11, x10
add x1, x13, x12
add x2, x15, x14
add x3, x17, x16
add x4, x19, x18
stp x0, x1, [sp, #344]
stp x2, x3, [sp, #360]
str x4, [sp, #376]
add x11, x8, x11
sub x11, x11, x10
add x13, x9, x13
sub x13, x13, x12
add x15, x9, x15
sub x15, x15, x14
add x17, x9, x17
sub x17, x17, x16
add x19, x9, x19
sub x19, x19, x18
stp x11, x13, [sp, #384]
stp x15, x17, [sp, #400]
str x19, [sp, #416]
// T4 = X3 - Z3, T3 = X3 + Z3
ldp x21, x23, [sp, #264]
ldp x25, x27, [sp, #280]
ldr x29, [sp, #296]
ldp x20, x22, [sp, #304]
ldp x24, x26, [sp, 320]
ldr x28, [sp, #336]
add x10, x8, x21
add x12, x9, x23
add x14, x9, x25
add x16, x9, x27
add x18, x9, x29
add x5, x21, x20
sub x20, x10, x20
add x6, x23, x22
sub x22, x12, x22
add x7, x25, x24
sub x24, x14, x24
add x8, x27, x26
sub x26, x16, x26
add x9, x29, x28
sub x28, x18, x28
stp x5, x6, [sp, #424]
stp x7, x8, [sp, #440]
str x9, [sp, #456]
// get current scalar bit
ldr x0, [sp, #104]
ldp x3, x4, [sp, #112]
bic x1, x3, #0x3f
lsr x1, x1, #3
ldr x2, [x0, x1]
lsr x2, x2, x3
and w2, w2, #1
subs w3, w3, #1
stp x3, x2, [sp, #112]
// compare current with previous scalar bit
cmp w2, w4
ldp x0, x1, [sp, #344]
ldp x2, x3, [sp, #360]
ldr x4, [sp, #376]
// T5 = CSelect(T1,T3,b)
csel x10, x5, x0, ne
csel x12, x6, x1, ne
csel x14, x7, x2, ne
csel x16, x8, x3, ne
csel x18, x9, x4, ne
stp x10, x12, [sp, #464]
stp x14, x16, [sp, 480]
str x18, [sp, #496]
// T6 = CSelect(T2,T4,b)
csel x11, x20, x11, ne
csel x13, x22, x13, ne
csel x15, x24, x15, ne
csel x17, x26, x17, ne
csel x19, x28, x19, ne
add x10, sp, #504
stp x11, x13, [x10, #0]
stp x15, x17, [x10, #16]
str x19, [x10, #32]
// X3 ← T1 · T4
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
lsr x21, x20, #32
lsr x23, x22, #32
lsr x25, x24, #32
lsr x27, x26, #32
lsr x29, x28, #32
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #264]
stp x4, x6, [sp, #280]
str x8, [sp, #296]
// Z3 ← T2 · T3
add x0, sp, #384
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
ldp w20, w21, [x0, #40]
ldp w22, w23, [x0, #48]
ldp w24, w25, [x0, #56]
ldp w26, w27, [x0, #64]
ldp w28, w29, [x0, #72]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #304]
stp x4, x6, [sp, #320]
str x8, [sp, #336]
// T6 ← T6^2
add x0, sp, #504
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
add x10, sp, #504
stp x0, x2, [x10, #0]
stp x4, x6, [x10, #16]
str x8, [x10, #32]
// T5 ← T5^2
add x0, sp, #464
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #464]
stp x4, x6, [sp, #480]
str x8, [sp, #496]
// X3
ldp x10, x12, [sp, #264]
ldp x14, x16, [sp, #280]
ldr x18, [sp, #296]
// Z3
ldp x20, x22, [sp, #304]
ldp x24, x26, [sp, #320]
ldr x28, [sp, #336]
// T8 ← X3 + Z3
add x21, x10, x20
add x23, x12, x22
add x25, x14, x24
add x27, x16, x26
add x29, x18, x28
add x11, sp, #544
stp x21, x23, [x11, #0]
stp x25, x27, [x11, #16]
str x29, [x11, #32]
// T7 ← X3 - Z3
ldp x1, x2, [sp, #128]
add x10, x10, x1
sub x10, x10, x20
lsr x11, x10, #32
add x12, x12, x2
sub x12, x12, x22
lsr x13, x12, #32
add x14, x14, x2
sub x14, x14, x24
lsr x15, x14, #32
add x16, x16, x2
sub x16, x16, x26
lsr x17, x16, #32
add x18, x18, x2
sub x18, x18, x28
lsr x19, x18, #32
// T1 ← T7^2
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #344]
stp x4, x6, [sp, #360]
str x8, [sp, #376]
// X3 ← T8^2
add x0, sp, #544
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #264]
stp x4, x6, [sp, #280]
str x8, [sp, #296]
// T7 ← T5 - T6
ldp x10, x12, [sp, #464]
ldp x14, x16, [sp, #480]
ldr x18, [sp, #496]
add x11, sp, #504
ldp x0, x2, [x11, #0]
ldp x4, x6, [x11, #16]
ldr x8, [x11, #32]
ldp x21, x22, [sp, #128]
add x10, x10, x21
sub x10, x10, x0
add x12, x12, x22
sub x12, x12, x2
add x14, x14, x22
sub x14, x14, x4
add x16, x16, x22
sub x16, x16, x6
add x18, x18, x22
sub x18, x18, x8
add x11, sp, #584
stp x10, x12, [x11, #0]
stp x14, x16, [x11, #16]
str x18, [x11, #32]
lsr x11, x10, #32
lsr x13, x12, #32
lsr x15, x14, #32
lsr x17, x16, #32
lsr x19, x18, #32
// T8 ← ((A + 2)/4) · T7 + T6
lsr x1, x0, #32
lsr x3, x2, #32
lsr x5, x4, #32
lsr x7, x6, #32
lsr x9, x8, #32
mov w0, w0
mov w2, w2
mov w4, w4
mov w6, w6
mov w8, w8
movz x20, #0xdb42
movk x20, #0x0001, lsl 16
umaddl x0, w10, w20, x0
umaddl x21, w11, w20, x1
umaddl x22, w12, w20, x2
umaddl x23, w13, w20, x3
umaddl x24, w14, w20, x4
umaddl x25, w15, w20, x5
umaddl x26, w16, w20, x6
umaddl x27, w17, w20, x7
umaddl x28, w18, w20, x8
umaddl x29, w19, w20, x9
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x21, x21, x0, lsr #26
and x20, x0, #0x3ffffff
add x27, x27, x26, lsr #26
and x26, x26, #0x3ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
add x28, x28, x27, lsr #25
and x27, x27, #0x1ffffff
add x23, x23, x22, lsr #26
and x22, x22, #0x3ffffff
add x29, x29, x28, lsr #26
and x28, x28, #0x3ffffff
add x24, x24, x23, lsr #25
and x23, x23, #0x1ffffff
bic x7, x29, #0x1ffffff
add x20, x20, x7, lsr #25
add x20, x20, x7, lsr #24
add x20, x20, x7, lsr #21
and x29, x29, #0x1ffffff
add x25, x25, x24, lsr #26
and x24, x24, #0x3ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
bfi x20, x21, #32, #25
bfi x22, x23, #32, #25
bfi x24, x25, #32, #25
bfi x26, x27, #32, #25
bfi x28, x29, #32, #25
add x10, sp, #544
stp x20, x22, [x10, #0]
stp x24, x26, [x10, #16]
str x28, [x10, #32]
// X2 ← T5 · T6
add x0, sp, #464
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
add x0, sp, #504
ldp x20, x22, [x0, #0]
ldp x24, x26, [x0, #16]
ldr x28, [x0, #32]
lsr x21, x20, #32
lsr x23, x22, #32
lsr x25, x24, #32
lsr x27, x26, #32
lsr x29, x28, #32
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #184]
stp x4, x6, [sp, #200]
str x8, [sp, #216]
// Z3 ← T1 · X1
add x0, sp, #144
ldp w10, w11, [x0, #200]
ldp w12, w13, [x0, #208]
ldp w14, w15, [x0, #216]
ldp w16, w17, [x0, #224]
ldp w18, w19, [x0, #232]
ldp w20, w21, [x0, #0]
ldp w22, w23, [x0, #8]
ldp w24, w25, [x0, #16]
ldp w26, w27, [x0, #24]
ldp w28, w29, [x0, #32]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #304]
stp x4, x6, [sp, #320]
str x8, [sp, #336]
// Z2 ← T7 · T8
add x0, sp, #544
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
ldp w20, w21, [x0, #40]
ldp w22, w23, [x0, #48]
ldp w24, w25, [x0, #56]
ldp w26, w27, [x0, #64]
ldp w28, w29, [x0, #72]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #224]
stp x4, x6, [sp, #240]
str x8, [sp, #256]
ldr w3, [sp, #112]
cmp w3, #3
bge .L0
ldr w3, [sp, #120]
cmp w3, wzr
// Z2 = CSelect(Z2,Z3,0,prevbit)
ldp x10, x12, [sp, #304]
ldp x14, x16, [sp, #320]
ldr x18, [sp, #336]
csel x10, x10, x0, ne
csel x12, x12, x2, ne
csel x14, x14, x4, ne
csel x16, x16, x6, ne
csel x18, x18, x8, ne
// X2 = CSelect(X2,X3,0,prevbit)
ldp x11, x13, [sp, #184]
ldp x15, x17, [sp, #200]
ldr x19, [sp, #216]
ldp x20, x22, [sp, #264]
ldp x24, x26, [sp, #280]
ldr x28, [sp, #296]
csel x11, x20, x11, ne
csel x13, x22, x13, ne
csel x15, x24, x15, ne
csel x17, x26, x17, ne
csel x19, x28, x19, ne
// post-process for the bit n[2] = 0
// T1 ← X2 + Z2, T2 ← X2 - Z2
add x0, x11, x10
add x2, x13, x12
add x4, x15, x14
add x6, x17, x16
add x8, x19, x18
stp x0, x2, [sp, #344]
stp x4, x6, [sp, #360]
str x8, [sp, #376]
ldp x8, x9, [sp, #128]
add x11, x8, x11
sub x10, x11, x10
add x13, x9, x13
sub x12, x13, x12
add x15, x9, x15
sub x14, x15, x14
add x17, x9, x17
sub x16, x17, x16
add x19, x9, x19
sub x18, x19, x18
// T2 ← T2^2
lsr x11, x10, #32
lsr x13, x12, #32
lsr x15, x14, #32
lsr x17, x16, #32
lsr x19, x18, #32
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #384]
stp x4, x6, [sp, #400]
str x8, [sp, #416]
// T1 ← T1^2
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #344]
stp x4, x6, [sp, #360]
str x8, [sp, #376]
// T3 ← T1 - T2
ldp x20, x22, [sp, #384]
ldp x24, x26, [sp, #400]
ldr x28, [sp, #416]
ldp x1, x3, [sp, #128]
add x10, x0, x1
sub x10, x10, x20
lsr x11, x10, #32
add x12, x2, x3
sub x12, x12, x22
lsr x13, x12, #32
add x14, x4, x3
sub x14, x14, x24
lsr x15, x14, #32
add x16, x6, x3
sub x16, x16, x26
lsr x17, x16, #32
add x18, x8, x3
sub x18, x18, x28
lsr x19, x18, #32
// T4 ← ((A + 2)/4) · T3 + T2
lsr x21, x20, #32
lsr x23, x22, #32
lsr x25, x24, #32
lsr x27, x26, #32
lsr x29, x28, #32
mov w20, w20
mov w22, w22
mov w24, w24
mov w26, w26
mov w28, w28
movz x0, #0xdb42
movk x0, #0x0001, lsl 16
umaddl x20, w10, w0, x20
umaddl x21, w11, w0, x21
umaddl x22, w12, w0, x22
umaddl x23, w13, w0, x23
umaddl x24, w14, w0, x24
umaddl x25, w15, w0, x25
umaddl x26, w16, w0, x26
umaddl x27, w17, w0, x27
umaddl x28, w18, w0, x28
umaddl x29, w19, w0, x29
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x27, x27, x26, lsr #26
and x26, x26, #0x3ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
add x28, x28, x27, lsr #25
and x27, x27, #0x1ffffff
add x23, x23, x22, lsr #26
and x22, x22, #0x3ffffff
add x29, x29, x28, lsr #26
and x28, x28, #0x3ffffff
add x24, x24, x23, lsr #25
and x23, x23, #0x1ffffff
bic x7, x29, #0x1ffffff
add x20, x20, x7, lsr #25
add x20, x20, x7, lsr #24
add x20, x20, x7, lsr #21
and x29, x29, #0x1ffffff
add x25, x25, x24, lsr #26
and x24, x24, #0x3ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
// Z2 ← T3 · T4
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #224]
stp x4, x6, [sp, #240]
str x8, [sp, #256]
// X2 ← T1 · T2
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
ldp w20, w21, [x0, #40]
ldp w22, w23, [x0, #48]
ldp w24, w25, [x0, #56]
ldp w26, w27, [x0, #64]
ldp w28, w29, [x0, #72]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
// post-process for the bit n[1] = 0
// T1 ← X2 + Z2, T2 ← X2 - Z2
ldp x10, x12, [sp, #224]
ldp x14, x16, [sp, #240]
ldr x18, [sp, #256]
add x1, x0, x10
add x3, x2, x12
add x5, x4, x14
add x7, x6, x16
add x9, x8, x18
stp x1, x3, [sp, #344]
stp x5, x7, [sp, #360]
str x9, [sp, #376]
ldp x1, x3, [sp, #128]
add x11, x1, x0
sub x10, x11, x10
add x13, x3, x2
sub x12, x13, x12
add x15, x3, x4
sub x14, x15, x14
add x17, x3, x6
sub x16, x17, x16
add x19, x3, x8
sub x18, x19, x18
// T2 ← T2^2
lsr x11, x10, #32
lsr x13, x12, #32
lsr x15, x14, #32
lsr x17, x16, #32
lsr x19, x18, #32
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #384]
stp x4, x6, [sp, #400]
str x8, [sp, #416]
// T1 ← T1^2
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #344]
stp x4, x6, [sp, #360]
str x8, [sp, #376]
// T3 ← T1 - T2
ldp x20, x22, [sp, #384]
ldp x24, x26, [sp, #400]
ldr x28, [sp, #416]
ldp x1, x3, [sp, #128]
add x10, x0, x1
sub x10, x10, x20
lsr x11, x10, #32
add x12, x2, x3
sub x12, x12, x22
lsr x13, x12, #32
add x14, x4, x3
sub x14, x14, x24
lsr x15, x14, #32
add x16, x6, x3
sub x16, x16, x26
lsr x17, x16, #32
add x18, x8, x3
sub x18, x18, x28
lsr x19, x18, #32
// T4 ← ((A + 2)/4) · T3 + T2
lsr x21, x20, #32
lsr x23, x22, #32
lsr x25, x24, #32
lsr x27, x26, #32
lsr x29, x28, #32
mov w20, w20
mov w22, w22
mov w24, w24
mov w26, w26
mov w28, w28
movz x0, #0xdb42
movk x0, #0x0001, lsl 16
umaddl x20, w10, w0, x20
umaddl x21, w11, w0, x21
umaddl x22, w12, w0, x22
umaddl x23, w13, w0, x23
umaddl x24, w14, w0, x24
umaddl x25, w15, w0, x25
umaddl x26, w16, w0, x26
umaddl x27, w17, w0, x27
umaddl x28, w18, w0, x28
umaddl x29, w19, w0, x29
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x27, x27, x26, lsr #26
and x26, x26, #0x3ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
add x28, x28, x27, lsr #25
and x27, x27, #0x1ffffff
add x23, x23, x22, lsr #26
and x22, x22, #0x3ffffff
add x29, x29, x28, lsr #26
and x28, x28, #0x3ffffff
add x24, x24, x23, lsr #25
and x23, x23, #0x1ffffff
bic x7, x29, #0x1ffffff
add x20, x20, x7, lsr #25
add x20, x20, x7, lsr #24
add x20, x20, x7, lsr #21
and x29, x29, #0x1ffffff
add x25, x25, x24, lsr #26
and x24, x24, #0x3ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
// Z2 ← T3 · T4
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #224]
stp x4, x6, [sp, #240]
str x8, [sp, #256]
// X2 ← T1 · T2
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
ldp w20, w21, [x0, #40]
ldp w22, w23, [x0, #48]
ldp w24, w25, [x0, #56]
ldp w26, w27, [x0, #64]
ldp w28, w29, [x0, #72]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
// post-process for the bit n[0] = 0
// T1 ← X2 + Z2, T2 ← X2 - Z2
ldp x10, x12, [sp, #224]
ldp x14, x16, [sp, #240]
ldr x18, [sp, #256]
add x1, x0, x10
add x3, x2, x12
add x5, x4, x14
add x7, x6, x16
add x9, x8, x18
stp x1, x3, [sp, #344]
stp x5, x7, [sp, #360]
str x9, [sp, #376]
ldp x1, x3, [sp, #128]
add x11, x1, x0
sub x10, x11, x10
add x13, x3, x2
sub x12, x13, x12
add x15, x3, x4
sub x14, x15, x14
add x17, x3, x6
sub x16, x17, x16
add x19, x3, x8
sub x18, x19, x18
// T2 ← T2^2
lsr x11, x10, #32
lsr x13, x12, #32
lsr x15, x14, #32
lsr x17, x16, #32
lsr x19, x18, #32
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #384]
stp x4, x6, [sp, #400]
str x8, [sp, #416]
// T1 ← T1^2
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
mul w20, w16, w30
mul w21, w18, w30
add w25, w30, w30
mul w22, w15, w25
mul w23, w17, w25
mul w24, w19, w25
umull x0, w10, w10
add w25, w10, w10
umull x1, w25, w11
umull x2, w25, w12
umull x3, w25, w13
umull x4, w25, w14
umull x5, w25, w15
umull x6, w25, w16
umull x7, w25, w17
umull x8, w25, w18
umaddl x4, w12, w12, x4
umaddl x8, w14, w14, x8
add w10, w15, w15
umaddl x1, w20, w10, x1
umaddl x2, w20, w16, x2
add w9, w17, w17
umaddl x3, w21, w10, x3
umaddl x5, w21, w9, x5
umaddl x6, w21, w18, x6
umaddl x0, w22, w15, x0
umaddl x1, w23, w14, x1
umaddl x2, w23, w10, x2
umaddl x3, w23, w16, x3
umaddl x4, w23, w17, x4
umaddl x1, w24, w12, x1
umaddl x3, w24, w14, x3
umaddl x4, w24, w10, x4
umaddl x5, w24, w16, x5
umaddl x6, w24, w9, x6
umaddl x7, w24, w18, x7
umaddl x8, w24, w19, x8
add w26, w11, w11
umaddl x0, w26, w24, x0
umaddl x2, w26, w11, x2
umaddl x3, w26, w12, x3
umaddl x5, w26, w14, x5
umaddl x6, w26, w10, x6
umaddl x7, w26, w16, x7
umaddl x8, w26, w9, x8
umull x9, w25, w19
umaddl x9, w26, w18, x9
add w27, w12, w12
umaddl x0, w27, w21, x0
umaddl x5, w27, w13, x5
umaddl x8, w27, w16, x8
umaddl x6, w27, w14, x6
umaddl x7, w27, w15, x7
umaddl x9, w27, w17, x9
add w28, w13, w13
umaddl x4, w26, w28, x4
umaddl x0, w28, w23, x0
umaddl x1, w28, w21, x1
umaddl x2, w28, w24, x2
umaddl x8, w28, w10, x8
umaddl x6, w28, w13, x6
umaddl x7, w28, w14, x7
umaddl x9, w28, w16, x9
add w29, w14, w14
umaddl x0, w29, w20, x0
umaddl x2, w29, w21, x2
umaddl x9, w29, w15, x9
add w18, w16, w16
umaddl x4, w18, w21, x4
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #344]
stp x4, x6, [sp, #360]
str x8, [sp, #376]
// T3 ← T1 - T2
ldp x20, x22, [sp, #384]
ldp x24, x26, [sp, #400]
ldr x28, [sp, #416]
ldp x1, x3, [sp, #128]
add x10, x0, x1
sub x10, x10, x20
lsr x11, x10, #32
add x12, x2, x3
sub x12, x12, x22
lsr x13, x12, #32
add x14, x4, x3
sub x14, x14, x24
lsr x15, x14, #32
add x16, x6, x3
sub x16, x16, x26
lsr x17, x16, #32
add x18, x8, x3
sub x18, x18, x28
lsr x19, x18, #32
// T4 ← ((A + 2)/4) · T3 + T2
lsr x21, x20, #32
lsr x23, x22, #32
lsr x25, x24, #32
lsr x27, x26, #32
lsr x29, x28, #32
mov w20, w20
mov w22, w22
mov w24, w24
mov w26, w26
mov w28, w28
movz x0, #0xdb42
movk x0, #0x0001, lsl 16
umaddl x20, w10, w0, x20
umaddl x21, w11, w0, x21
umaddl x22, w12, w0, x22
umaddl x23, w13, w0, x23
umaddl x24, w14, w0, x24
umaddl x25, w15, w0, x25
umaddl x26, w16, w0, x26
umaddl x27, w17, w0, x27
umaddl x28, w18, w0, x28
umaddl x29, w19, w0, x29
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x27, x27, x26, lsr #26
and x26, x26, #0x3ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
add x28, x28, x27, lsr #25
and x27, x27, #0x1ffffff
add x23, x23, x22, lsr #26
and x22, x22, #0x3ffffff
add x29, x29, x28, lsr #26
and x28, x28, #0x3ffffff
add x24, x24, x23, lsr #25
and x23, x23, #0x1ffffff
bic x7, x29, #0x1ffffff
add x20, x20, x7, lsr #25
add x20, x20, x7, lsr #24
add x20, x20, x7, lsr #21
and x29, x29, #0x1ffffff
add x25, x25, x24, lsr #26
and x24, x24, #0x3ffffff
add x21, x21, x20, lsr #26
and x20, x20, #0x3ffffff
add x26, x26, x25, lsr #25
and x25, x25, #0x1ffffff
add x22, x22, x21, lsr #25
and x21, x21, #0x1ffffff
// Z2 ← T3 · T4
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
stp x0, x2, [sp, #224]
stp x4, x6, [sp, #240]
str x8, [sp, #256]
// X2 ← T1 · T2
add x0, sp, #344
ldp w10, w11, [x0, #0]
ldp w12, w13, [x0, #8]
ldp w14, w15, [x0, #16]
ldp w16, w17, [x0, #24]
ldp w18, w19, [x0, #32]
ldp w20, w21, [x0, #40]
ldp w22, w23, [x0, #48]
ldp w24, w25, [x0, #56]
ldp w26, w27, [x0, #64]
ldp w28, w29, [x0, #72]
umull x0, w10, w20
umull x1, w10, w21
umull x2, w10, w22
umull x3, w10, w23
umull x4, w10, w24
umull x5, w10, w25
umull x6, w10, w26
umull x7, w10, w27
umull x8, w10, w28
umull x9, w10, w29
umaddl x1, w11, w20, x1
umaddl x3, w11, w22, x3
umaddl x5, w11, w24, x5
umaddl x7, w11, w26, x7
umaddl x9, w11, w28, x9
umaddl x2, w12, w20, x2
umaddl x3, w12, w21, x3
umaddl x4, w12, w22, x4
umaddl x5, w12, w23, x5
umaddl x6, w12, w24, x6
umaddl x7, w12, w25, x7
umaddl x8, w12, w26, x8
umaddl x9, w12, w27, x9
umaddl x3, w13, w20, x3
umaddl x5, w13, w22, x5
umaddl x7, w13, w24, x7
umaddl x9, w13, w26, x9
umaddl x4, w14, w20, x4
umaddl x5, w14, w21, x5
umaddl x6, w14, w22, x6
umaddl x7, w14, w23, x7
umaddl x8, w14, w24, x8
umaddl x9, w14, w25, x9
umaddl x5, w15, w20, x5
umaddl x7, w15, w22, x7
umaddl x9, w15, w24, x9
umaddl x6, w16, w20, x6
umaddl x7, w16, w21, x7
umaddl x8, w16, w22, x8
umaddl x9, w16, w23, x9
umaddl x7, w17, w20, x7
umaddl x9, w17, w22, x9
umaddl x8, w18, w20, x8
umaddl x9, w18, w21, x9
umaddl x9, w19, w20, x9
mul w22, w22, w30
mul w24, w24, w30
mul w26, w26, w30
mul w28, w28, w30
umaddl x0, w12, w28, x0
umaddl x1, w13, w28, x1
umaddl x0, w14, w26, x0
umaddl x2, w14, w28, x2
umaddl x1, w15, w26, x1
umaddl x3, w15, w28, x3
umaddl x0, w16, w24, x0
umaddl x2, w16, w26, x2
umaddl x4, w16, w28, x4
umaddl x1, w17, w24, x1
umaddl x3, w17, w26, x3
umaddl x5, w17, w28, x5
umaddl x0, w18, w22, x0
umaddl x2, w18, w24, x2
umaddl x6, w18, w28, x6
umaddl x4, w18, w26, x4
umaddl x1, w19, w22, x1
umaddl x3, w19, w24, x3
umaddl x5, w19, w26, x5
umaddl x7, w19, w28, x7
add w11, w11, w11
umaddl x2, w11, w21, x2
umaddl x4, w11, w23, x4
umaddl x6, w11, w25, x6
umaddl x8, w11, w27, x8
add w13, w13, w13
umaddl x4, w13, w21, x4
umaddl x6, w13, w23, x6
umaddl x8, w13, w25, x8
add w15, w15, w15
umaddl x6, w15, w21, x6
umaddl x8, w15, w23, x8
add w17, w17, w17
umaddl x8, w17, w21, x8
mul w21, w21, w30
mul w23, w23, w30
mul w25, w25, w30
mul w27, w27, w30
mul w29, w29, w30
umaddl x0, w11, w29, x0
umaddl x1, w12, w29, x1
umaddl x0, w13, w27, x0
umaddl x2, w13, w29, x2
umaddl x1, w14, w27, x1
umaddl x3, w14, w29, x3
umaddl x0, w15, w25, x0
umaddl x2, w15, w27, x2
umaddl x4, w15, w29, x4
umaddl x1, w16, w25, x1
umaddl x3, w16, w27, x3
umaddl x5, w16, w29, x5
umaddl x0, w17, w23, x0
umaddl x2, w17, w25, x2
umaddl x4, w17, w27, x4
umaddl x6, w17, w29, x6
umaddl x1, w18, w23, x1
umaddl x3, w18, w25, x3
umaddl x5, w18, w27, x5
umaddl x7, w18, w29, x7
add w19, w19, w19
umaddl x0, w19, w21, x0
umaddl x2, w19, w23, x2
umaddl x4, w19, w25, x4
umaddl x6, w19, w27, x6
umaddl x8, w19, w29, x8
add x6, x6, x5, lsr #25
and x5, x5, #0x1ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
add x7, x7, x6, lsr #26
and x6, x6, #0x3ffffff
add x2, x2, x1, lsr #25
and x1, x1, #0x1ffffff
add x8, x8, x7, lsr #25
and x7, x7, #0x1ffffff
add x3, x3, x2, lsr #26
and x2, x2, #0x3ffffff
add x9, x9, x8, lsr #26
and x8, x8, #0x3ffffff
add x4, x4, x3, lsr #25
and x3, x3, #0x1ffffff
bfi x2, x3, #32, #25
bic x10, x9, #0x1ffffff
add x0, x0, x10, lsr #25
add x0, x0, x10, lsr #24
add x0, x0, x10, lsr #21
and x9, x9, #0x1ffffff
bfi x8, x9, #32, #25
add x5, x5, x4, lsr #26
and x4, x4, #0x3ffffff
add x1, x1, x0, lsr #26
and x0, x0, #0x3ffffff
bfi x0, x1, #32, #26
add x6, x6, x5, lsr #25
bfi x6, x7, #32, #25
and x5, x5, #0x1ffffff
bfi x4, x5, #32, #25
ldr x1, [sp, #96]
// store X2
stp x0, x2, [x1, #0]
stp x4, x6, [x1, #16]
str x8, [x1, #32]
// store Z2
ldp x0, x2, [sp, #224]
ldp x4, x6, [sp, #240]
ldr x8, [sp, #256]
stp x0, x2, [x1, #40]
stp x4, x6, [x1, #56]
str x8, [x1, #72]
ldp x29, x30, [sp, #80]
ldp x27, x28, [sp, #64]
ldp x25, x26, [sp, #48]
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp, #0]
add sp, sp, #624
ret
.section .note.GNU-stack,"",@progbits