/* armv8-aes-asm
 *
 * Copyright (C) 2006-2025 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
#include <wolfssl/wolfcrypt/error-crypt.h>

/* Generated using (from wolfssl):
 *   cd ../scripts
 *   ruby ./aes/aes.rb arm64 \
 *       ../wolfssl/wolfcrypt/src/port/arm/armv8-aes-asm.c
 */
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifdef WOLFSSL_ARMASM_INLINE
#include <wolfssl/wolfcrypt/aes.h>

#if !defined(NO_AES) && defined(WOLFSSL_ARMASM)
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir)
{
    __asm__ __volatile__ (
        "cmp	%x[keylen], #24\n\t"
        "b.lt	L_aes_set_key_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_set_key_arm64_crypto_start_256_%=\n\t"
        "ldr	x4, [%x[userKey]], #8\n\t"
        "ldr	x6, [%x[userKey]], #8\n\t"
        "ldr	x8, [%x[userKey]], #8\n\t"
        "stp	x4, x6, [%x[key]], #16\n\t"
        "str	x8, [%x[key]], #8\n\t"
        "lsr	x5, x4, #32\n\t"
        "lsr	x7, x6, #32\n\t"
        "lsr	x9, x8, #32\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #1\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "eor	w8, w8, w7\n\t"
        "eor	w9, w9, w8\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #2\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "eor	w8, w8, w7\n\t"
        "eor	w9, w9, w8\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #4\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "eor	w8, w8, w7\n\t"
        "eor	w9, w9, w8\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #8\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "eor	w8, w8, w7\n\t"
        "eor	w9, w9, w8\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #16\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "eor	w8, w8, w7\n\t"
        "eor	w9, w9, w8\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #32\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "eor	w8, w8, w7\n\t"
        "eor	w9, w9, w8\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #0x40\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "eor	w8, w8, w7\n\t"
        "eor	w9, w9, w8\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "dup	v1.4s, w9\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #0x80\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "cmp	%w[dir], #0\n\t"
        "b.eq	L_aes_set_key_arm64_crypto_done_%=\n\t"
        "sub	%x[key], %x[key], #0xd0\n\t"
        "ldur	q0, [%x[key]]\n\t"
        "ldur	q1, [%x[key], #192]\n\t"
        "stur	q1, [%x[key]]\n\t"
        "stur	q0, [%x[key], #192]\n\t"
        "ldur	q0, [%x[key], #16]\n\t"
        "ldur	q1, [%x[key], #176]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #16]\n\t"
        "stur	q0, [%x[key], #176]\n\t"
        "ldur	q0, [%x[key], #32]\n\t"
        "ldur	q1, [%x[key], #160]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #32]\n\t"
        "stur	q0, [%x[key], #160]\n\t"
        "ldur	q0, [%x[key], #48]\n\t"
        "ldur	q1, [%x[key], #144]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #48]\n\t"
        "stur	q0, [%x[key], #144]\n\t"
        "ldur	q0, [%x[key], #64]\n\t"
        "ldur	q1, [%x[key], #128]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #64]\n\t"
        "stur	q0, [%x[key], #128]\n\t"
        "ldur	q0, [%x[key], #80]\n\t"
        "ldur	q1, [%x[key], #112]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #80]\n\t"
        "stur	q0, [%x[key], #112]\n\t"
        "ldur	q0, [%x[key], #96]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "stur	q0, [%x[key], #96]\n\t"
        "b	L_aes_set_key_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_set_key_arm64_crypto_start_256_%=: \n\t"
        "ldr	x4, [%x[userKey]], #8\n\t"
        "ldr	x6, [%x[userKey]], #8\n\t"
        "ldr	x8, [%x[userKey]], #8\n\t"
        "ldr	x10, [%x[userKey]], #8\n\t"
        "stp	x4, x6, [%x[key]], #16\n\t"
        "stp	x8, x10, [%x[key]], #16\n\t"
        "lsr	x5, x4, #32\n\t"
        "lsr	x7, x6, #32\n\t"
        "lsr	x9, x8, #32\n\t"
        "lsr	x11, x10, #32\n\t"
        "dup	v1.4s, w11\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #1\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "eor	w8, w8, w12\n\t"
        "eor	w9, w9, w8\n\t"
        "eor	w10, w10, w9\n\t"
        "eor	w11, w11, w10\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "stp	w10, w11, [%x[key]], #8\n\t"
        "dup	v1.4s, w11\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #2\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "eor	w8, w8, w12\n\t"
        "eor	w9, w9, w8\n\t"
        "eor	w10, w10, w9\n\t"
        "eor	w11, w11, w10\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "stp	w10, w11, [%x[key]], #8\n\t"
        "dup	v1.4s, w11\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #4\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "eor	w8, w8, w12\n\t"
        "eor	w9, w9, w8\n\t"
        "eor	w10, w10, w9\n\t"
        "eor	w11, w11, w10\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "stp	w10, w11, [%x[key]], #8\n\t"
        "dup	v1.4s, w11\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #8\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "eor	w8, w8, w12\n\t"
        "eor	w9, w9, w8\n\t"
        "eor	w10, w10, w9\n\t"
        "eor	w11, w11, w10\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "stp	w10, w11, [%x[key]], #8\n\t"
        "dup	v1.4s, w11\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #16\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "eor	w8, w8, w12\n\t"
        "eor	w9, w9, w8\n\t"
        "eor	w10, w10, w9\n\t"
        "eor	w11, w11, w10\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "stp	w10, w11, [%x[key]], #8\n\t"
        "dup	v1.4s, w11\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #32\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "eor	w8, w8, w12\n\t"
        "eor	w9, w9, w8\n\t"
        "eor	w10, w10, w9\n\t"
        "eor	w11, w11, w10\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "stp	w8, w9, [%x[key]], #8\n\t"
        "stp	w10, w11, [%x[key]], #8\n\t"
        "dup	v1.4s, w11\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #0x40\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "cmp	%w[dir], #0\n\t"
        "b.eq	L_aes_set_key_arm64_crypto_done_%=\n\t"
        "sub	%x[key], %x[key], #0xf0\n\t"
        "ldur	q0, [%x[key]]\n\t"
        "ldur	q1, [%x[key], #224]\n\t"
        "stur	q1, [%x[key]]\n\t"
        "stur	q0, [%x[key], #224]\n\t"
        "ldur	q0, [%x[key], #16]\n\t"
        "ldur	q1, [%x[key], #208]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #16]\n\t"
        "stur	q0, [%x[key], #208]\n\t"
        "ldur	q0, [%x[key], #32]\n\t"
        "ldur	q1, [%x[key], #192]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #32]\n\t"
        "stur	q0, [%x[key], #192]\n\t"
        "ldur	q0, [%x[key], #48]\n\t"
        "ldur	q1, [%x[key], #176]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #48]\n\t"
        "stur	q0, [%x[key], #176]\n\t"
        "ldur	q0, [%x[key], #64]\n\t"
        "ldur	q1, [%x[key], #160]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #64]\n\t"
        "stur	q0, [%x[key], #160]\n\t"
        "ldur	q0, [%x[key], #80]\n\t"
        "ldur	q1, [%x[key], #144]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #80]\n\t"
        "stur	q0, [%x[key], #144]\n\t"
        "ldur	q0, [%x[key], #96]\n\t"
        "ldur	q1, [%x[key], #128]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #96]\n\t"
        "stur	q0, [%x[key], #128]\n\t"
        "ldur	q0, [%x[key], #112]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "stur	q0, [%x[key], #112]\n\t"
        "b	L_aes_set_key_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_set_key_arm64_crypto_start_128_%=: \n\t"
        "ldr	x4, [%x[userKey]], #8\n\t"
        "ldr	x6, [%x[userKey]], #8\n\t"
        "stp	x4, x6, [%x[key]], #16\n\t"
        "lsr	x5, x4, #32\n\t"
        "lsr	x7, x6, #32\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #1\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #2\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #4\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #8\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #16\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #32\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #0x40\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, #0x80\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "mov	w13, #27\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, w13\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "dup	v1.4s, w7\n\t"
        "movi	v0.16b, #0\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "mov	w12, v0.s[0]\n\t"
        "mov	w13, #54\n\t"
        "ror	w12, w12, #8\n\t"
        "eor	w4, w4, w13\n\t"
        "eor	w4, w4, w12\n\t"
        "eor	w5, w5, w4\n\t"
        "eor	w6, w6, w5\n\t"
        "eor	w7, w7, w6\n\t"
        "stp	w4, w5, [%x[key]], #8\n\t"
        "stp	w6, w7, [%x[key]], #8\n\t"
        "cmp	%w[dir], #0\n\t"
        "b.eq	L_aes_set_key_arm64_crypto_done_%=\n\t"
        "sub	%x[key], %x[key], #0xb0\n\t"
        "ldur	q0, [%x[key]]\n\t"
        "ldur	q1, [%x[key], #160]\n\t"
        "stur	q1, [%x[key]]\n\t"
        "stur	q0, [%x[key], #160]\n\t"
        "ldur	q0, [%x[key], #16]\n\t"
        "ldur	q1, [%x[key], #144]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #16]\n\t"
        "stur	q0, [%x[key], #144]\n\t"
        "ldur	q0, [%x[key], #32]\n\t"
        "ldur	q1, [%x[key], #128]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #32]\n\t"
        "stur	q0, [%x[key], #128]\n\t"
        "ldur	q0, [%x[key], #48]\n\t"
        "ldur	q1, [%x[key], #112]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #48]\n\t"
        "stur	q0, [%x[key], #112]\n\t"
        "ldur	q0, [%x[key], #64]\n\t"
        "ldur	q1, [%x[key], #96]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "stur	q1, [%x[key], #64]\n\t"
        "stur	q0, [%x[key], #96]\n\t"
        "ldur	q0, [%x[key], #80]\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "stur	q0, [%x[key], #80]\n\t"
        "\n"
    "L_aes_set_key_arm64_crypto_done_%=: \n\t"
        : [keylen] "+r" (keylen), [key] "+r" (key), [dir] "+r" (dir)
        : [userKey] "r" (userKey)
        : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
            "x12", "x13", "v0", "v1"
    );
}

#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
void AES_encrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr)
{
    __asm__ __volatile__ (
        "ld1	{v0.16b}, [%x[inBlock]]\n\t"
        "ld1	{v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v3.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v4.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "ld1	{v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v3.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v4.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "subs	%w[nr], %w[nr], #10\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "b.eq	L_aes_encrypt_arm64_crypto_round_done_%=\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "subs	%w[nr], %w[nr], #2\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "b.eq	L_aes_encrypt_arm64_crypto_round_done_%=\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "\n"
    "L_aes_encrypt_arm64_crypto_round_done_%=: \n\t"
        "ld1	{v1.2d}, [%x[key]]\n\t"
        "eor	v0.16b, v0.16b, v1.16b\n\t"
        "st1	{v0.16b}, [%x[outBlock]]\n\t"
        : [outBlock] "+r" (outBlock), [key] "+r" (key), [nr] "+r" (nr)
        : [inBlock] "r" (inBlock)
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4"
    );
}

#endif /* defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) */
#if !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
#ifdef HAVE_AES_DECRYPT
void AES_decrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr)
{
    __asm__ __volatile__ (
        "ld1	{v0.16b}, [%x[inBlock]]\n\t"
        "ld1	{v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t"
        "aesd	v0.16b, v1.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v2.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v3.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v4.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "ld1	{v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t"
        "aesd	v0.16b, v1.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v2.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v3.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v4.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "aesd	v0.16b, v1.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v2.16b\n\t"
        "subs	%w[nr], %w[nr], #10\n\t"
        "b.eq	L_aes_decrypt_arm64_crypto_round_done_%=\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v1.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v2.16b\n\t"
        "subs	%w[nr], %w[nr], #2\n\t"
        "b.eq	L_aes_decrypt_arm64_crypto_round_done_%=\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v1.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v2.16b\n\t"
        "\n"
    "L_aes_decrypt_arm64_crypto_round_done_%=: \n\t"
        "ld1	{v1.2d}, [%x[key]]\n\t"
        "eor	v0.16b, v0.16b, v1.16b\n\t"
        "st1	{v0.16b}, [%x[outBlock]]\n\t"
        : [outBlock] "+r" (outBlock), [key] "+r" (key), [nr] "+r" (nr)
        : [inBlock] "r" (inBlock)
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4"
    );
}

#endif /* HAVE_AES_DECRYPT */
#endif /* !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) */
#ifdef HAVE_AES_ECB
void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key,
    int nr)
{
    __asm__ __volatile__ (
        "ld1	{v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v24.2d, v25.2d, v26.2d}, [%x[key]], #48\n\t"
        "lsr	%w[sz], %w[sz], #4\n\t"
        "cmp	%w[nr], #12\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_encrypt_blocks_arm64_crypto_start_256_%=\n\t"
        /* AES_ECB_192 */
#ifndef NO_AES_192
        "ld1	{v27.2d, v28.2d}, [%x[key]], #32\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.eq	L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_192_start_4_%=\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_192_start_8_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v16.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v16.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v16.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v16.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v16.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v16.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v17.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v17.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v17.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v17.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v17.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v17.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v18.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v18.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v18.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v18.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v18.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v18.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v19.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v19.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v19.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v19.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v19.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v19.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v20.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v20.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v20.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v20.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v20.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v20.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v21.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v21.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v21.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v21.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v21.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v21.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v22.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v22.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v22.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v22.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v22.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v22.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v23.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v23.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v23.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v23.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v23.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v23.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v24.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v24.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v24.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v24.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v24.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v24.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v25.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v25.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v25.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v25.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v25.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v25.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v26.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v26.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v26.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v26.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v26.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v26.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v26.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "aese	v1.16b, v27.16b\n\t"
        "eor	v1.16b, v1.16b, v28.16b\n\t"
        "aese	v2.16b, v27.16b\n\t"
        "eor	v2.16b, v2.16b, v28.16b\n\t"
        "aese	v3.16b, v27.16b\n\t"
        "eor	v3.16b, v3.16b, v28.16b\n\t"
        "aese	v4.16b, v27.16b\n\t"
        "eor	v4.16b, v4.16b, v28.16b\n\t"
        "aese	v5.16b, v27.16b\n\t"
        "eor	v5.16b, v5.16b, v28.16b\n\t"
        "aese	v6.16b, v27.16b\n\t"
        "eor	v6.16b, v6.16b, v28.16b\n\t"
        "aese	v7.16b, v27.16b\n\t"
        "eor	v7.16b, v7.16b, v28.16b\n\t"
        "sub	%w[sz], %w[sz], #8\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "st1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.ge	L_aes_encrypt_blocks_arm64_crypto_192_start_8_%=\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_192_start_4_%=: \n\t"
        "cmp	%w[sz], #4\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_192_start_2_%=\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v16.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v16.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v17.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v17.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v18.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v18.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v19.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v19.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v20.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v20.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v21.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v21.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v22.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v22.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v23.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v23.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v24.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v24.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v25.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v25.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v26.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v26.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v26.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "aese	v1.16b, v27.16b\n\t"
        "eor	v1.16b, v1.16b, v28.16b\n\t"
        "aese	v2.16b, v27.16b\n\t"
        "eor	v2.16b, v2.16b, v28.16b\n\t"
        "aese	v3.16b, v27.16b\n\t"
        "eor	v3.16b, v3.16b, v28.16b\n\t"
        "sub	%w[sz], %w[sz], #4\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_192_start_2_%=: \n\t"
        "cmp	%w[sz], #2\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=\n\t"
        "ld1	{v0.16b, v1.16b}, [%x[in]], #32\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v26.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "aese	v1.16b, v27.16b\n\t"
        "eor	v1.16b, v1.16b, v28.16b\n\t"
        "sub	%w[sz], %w[sz], #2\n\t"
        "st1	{v0.16b, v1.16b}, [%x[out]], #32\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=: \n\t"
        "cbz	%w[sz], L_aes_encrypt_blocks_arm64_crypto_192_done_%=\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_192_done_%=: \n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_encrypt_blocks_arm64_crypto_done_%=\n\t"
        /* AES_ECB_256 */
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_start_256_%=: \n\t"
#ifndef NO_AES_256
        "ld1	{v27.2d, v28.2d, v29.2d, v30.2d}, [%x[key]], #0x40\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.eq	L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_256_start_4_%=\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_256_start_8_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v16.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v16.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v16.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v16.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v16.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v16.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v17.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v17.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v17.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v17.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v17.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v17.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v18.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v18.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v18.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v18.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v18.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v18.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v19.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v19.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v19.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v19.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v19.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v19.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v20.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v20.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v20.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v20.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v20.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v20.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v21.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v21.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v21.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v21.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v21.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v21.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v22.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v22.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v22.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v22.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v22.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v22.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v23.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v23.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v23.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v23.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v23.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v23.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v24.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v24.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v24.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v24.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v24.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v24.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v25.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v25.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v25.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v25.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v25.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v25.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v26.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v26.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v26.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v26.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v26.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v26.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v26.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v27.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v27.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v27.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v27.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v27.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v27.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v27.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v28.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v28.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v28.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v28.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v28.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v28.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v28.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v28.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "aese	v1.16b, v29.16b\n\t"
        "eor	v1.16b, v1.16b, v30.16b\n\t"
        "aese	v2.16b, v29.16b\n\t"
        "eor	v2.16b, v2.16b, v30.16b\n\t"
        "aese	v3.16b, v29.16b\n\t"
        "eor	v3.16b, v3.16b, v30.16b\n\t"
        "aese	v4.16b, v29.16b\n\t"
        "eor	v4.16b, v4.16b, v30.16b\n\t"
        "aese	v5.16b, v29.16b\n\t"
        "eor	v5.16b, v5.16b, v30.16b\n\t"
        "aese	v6.16b, v29.16b\n\t"
        "eor	v6.16b, v6.16b, v30.16b\n\t"
        "aese	v7.16b, v29.16b\n\t"
        "eor	v7.16b, v7.16b, v30.16b\n\t"
        "sub	%w[sz], %w[sz], #8\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "st1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.ge	L_aes_encrypt_blocks_arm64_crypto_256_start_8_%=\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_256_start_4_%=: \n\t"
        "cmp	%w[sz], #4\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_256_start_2_%=\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v16.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v16.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v17.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v17.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v18.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v18.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v19.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v19.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v20.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v20.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v21.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v21.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v22.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v22.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v23.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v23.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v24.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v24.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v25.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v25.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v26.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v26.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v26.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v27.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v27.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v27.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v28.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v28.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v28.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v28.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "aese	v1.16b, v29.16b\n\t"
        "eor	v1.16b, v1.16b, v30.16b\n\t"
        "aese	v2.16b, v29.16b\n\t"
        "eor	v2.16b, v2.16b, v30.16b\n\t"
        "aese	v3.16b, v29.16b\n\t"
        "eor	v3.16b, v3.16b, v30.16b\n\t"
        "sub	%w[sz], %w[sz], #4\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_256_start_2_%=: \n\t"
        "cmp	%w[sz], #2\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=\n\t"
        "ld1	{v0.16b, v1.16b}, [%x[in]], #32\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v26.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v27.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v28.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v28.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "aese	v1.16b, v29.16b\n\t"
        "eor	v1.16b, v1.16b, v30.16b\n\t"
        "sub	%w[sz], %w[sz], #2\n\t"
        "st1	{v0.16b, v1.16b}, [%x[out]], #32\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=: \n\t"
        "cbz	%w[sz], L_aes_encrypt_blocks_arm64_crypto_256_done_%=\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v28.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_256_done_%=: \n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_encrypt_blocks_arm64_crypto_done_%=\n\t"
        /* AES_ECB_128 */
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_start_128_%=: \n\t"
#ifndef NO_AES_128
        "cmp	%w[sz], #1\n\t"
        "b.eq	L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_128_start_4_%=\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_128_start_8_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v16.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v16.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v16.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v16.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v16.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v16.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v17.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v17.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v17.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v17.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v17.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v17.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v18.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v18.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v18.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v18.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v18.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v18.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v19.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v19.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v19.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v19.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v19.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v19.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v20.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v20.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v20.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v20.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v20.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v20.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v21.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v21.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v21.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v21.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v21.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v21.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v22.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v22.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v22.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v22.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v22.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v22.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v23.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v23.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v23.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v23.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v23.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v23.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v24.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v24.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v4.16b, v24.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v5.16b, v24.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v6.16b, v24.16b\n\t"
        "aesmc	v6.16b, v6.16b\n\t"
        "aese	v7.16b, v24.16b\n\t"
        "aesmc	v7.16b, v7.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "eor	v1.16b, v1.16b, v26.16b\n\t"
        "aese	v2.16b, v25.16b\n\t"
        "eor	v2.16b, v2.16b, v26.16b\n\t"
        "aese	v3.16b, v25.16b\n\t"
        "eor	v3.16b, v3.16b, v26.16b\n\t"
        "aese	v4.16b, v25.16b\n\t"
        "eor	v4.16b, v4.16b, v26.16b\n\t"
        "aese	v5.16b, v25.16b\n\t"
        "eor	v5.16b, v5.16b, v26.16b\n\t"
        "aese	v6.16b, v25.16b\n\t"
        "eor	v6.16b, v6.16b, v26.16b\n\t"
        "aese	v7.16b, v25.16b\n\t"
        "eor	v7.16b, v7.16b, v26.16b\n\t"
        "sub	%w[sz], %w[sz], #8\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "st1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.ge	L_aes_encrypt_blocks_arm64_crypto_128_start_8_%=\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_128_start_4_%=: \n\t"
        "cmp	%w[sz], #4\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_128_start_2_%=\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v16.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v16.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v17.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v17.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v18.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v18.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v19.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v19.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v20.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v20.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v21.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v21.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v22.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v22.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v23.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v23.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v2.16b, v24.16b\n\t"
        "aesmc	v2.16b, v2.16b\n\t"
        "aese	v3.16b, v24.16b\n\t"
        "aesmc	v3.16b, v3.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "eor	v1.16b, v1.16b, v26.16b\n\t"
        "aese	v2.16b, v25.16b\n\t"
        "eor	v2.16b, v2.16b, v26.16b\n\t"
        "aese	v3.16b, v25.16b\n\t"
        "eor	v3.16b, v3.16b, v26.16b\n\t"
        "sub	%w[sz], %w[sz], #4\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_128_start_2_%=: \n\t"
        "cmp	%w[sz], #2\n\t"
        "b.lt	L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=\n\t"
        "ld1	{v0.16b, v1.16b}, [%x[in]], #32\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v16.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v17.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v18.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v19.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v20.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v21.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v22.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v23.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v1.16b, v24.16b\n\t"
        "aesmc	v1.16b, v1.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "aese	v1.16b, v25.16b\n\t"
        "eor	v1.16b, v1.16b, v26.16b\n\t"
        "sub	%w[sz], %w[sz], #2\n\t"
        "st1	{v0.16b, v1.16b}, [%x[out]], #32\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=: \n\t"
        "cbz	%w[sz], L_aes_encrypt_blocks_arm64_crypto_128_done_%=\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_128_done_%=: \n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_encrypt_blocks_arm64_crypto_done_%=: \n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [nr] "+r" (nr)
        : [in] "r" (in)
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
            "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
            "v26", "v27", "v28", "v29", "v30"
    );
}

#ifdef HAVE_AES_DECRYPT
void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key,
    int nr)
{
    __asm__ __volatile__ (
        "ld1	{v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v24.2d, v25.2d, v26.2d}, [%x[key]], #48\n\t"
        "lsr	%w[sz], %w[sz], #4\n\t"
        "cmp	%w[nr], #12\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_decrypt_blocks_arm64_crypto_start_256_%=\n\t"
        /* AES_ECB_192 */
#ifndef NO_AES_192
        "ld1	{v27.2d, v28.2d}, [%x[key]], #32\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.eq	L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_192_start_4_%=\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_192_start_8_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v16.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v16.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v16.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v16.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v16.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v16.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v17.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v17.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v17.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v17.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v17.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v17.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v18.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v18.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v18.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v18.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v18.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v18.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v19.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v19.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v19.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v19.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v19.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v19.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v20.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v20.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v20.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v20.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v20.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v20.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v21.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v21.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v21.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v21.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v21.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v21.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v22.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v22.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v22.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v22.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v22.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v22.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v23.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v23.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v23.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v23.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v23.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v23.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v24.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v24.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v24.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v24.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v24.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v24.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v25.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v25.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v25.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v25.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v25.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v25.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v26.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v26.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v26.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v26.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v26.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v26.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "eor	v1.16b, v1.16b, v28.16b\n\t"
        "aesd	v2.16b, v27.16b\n\t"
        "eor	v2.16b, v2.16b, v28.16b\n\t"
        "aesd	v3.16b, v27.16b\n\t"
        "eor	v3.16b, v3.16b, v28.16b\n\t"
        "aesd	v4.16b, v27.16b\n\t"
        "eor	v4.16b, v4.16b, v28.16b\n\t"
        "aesd	v5.16b, v27.16b\n\t"
        "eor	v5.16b, v5.16b, v28.16b\n\t"
        "aesd	v6.16b, v27.16b\n\t"
        "eor	v6.16b, v6.16b, v28.16b\n\t"
        "aesd	v7.16b, v27.16b\n\t"
        "eor	v7.16b, v7.16b, v28.16b\n\t"
        "sub	%w[sz], %w[sz], #8\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "st1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.ge	L_aes_decrypt_blocks_arm64_crypto_192_start_8_%=\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_192_start_4_%=: \n\t"
        "cmp	%w[sz], #4\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_192_start_2_%=\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v16.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v16.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v17.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v17.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v18.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v18.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v19.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v19.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v20.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v20.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v21.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v21.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v22.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v22.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v23.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v23.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v24.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v24.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v25.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v25.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v26.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v26.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "eor	v1.16b, v1.16b, v28.16b\n\t"
        "aesd	v2.16b, v27.16b\n\t"
        "eor	v2.16b, v2.16b, v28.16b\n\t"
        "aesd	v3.16b, v27.16b\n\t"
        "eor	v3.16b, v3.16b, v28.16b\n\t"
        "sub	%w[sz], %w[sz], #4\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_192_start_2_%=: \n\t"
        "cmp	%w[sz], #2\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t"
        "ld1	{v0.16b, v1.16b}, [%x[in]], #32\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "eor	v1.16b, v1.16b, v28.16b\n\t"
        "sub	%w[sz], %w[sz], #2\n\t"
        "st1	{v0.16b, v1.16b}, [%x[out]], #32\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=: \n\t"
        "cbz	%w[sz], L_aes_decrypt_blocks_arm64_crypto_192_done_%=\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_192_done_%=: \n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_decrypt_blocks_arm64_crypto_done_%=\n\t"
        /* AES_ECB_256 */
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_start_256_%=: \n\t"
#ifndef NO_AES_256
        "ld1	{v27.2d, v28.2d, v29.2d, v30.2d}, [%x[key]], #0x40\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.eq	L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_256_start_4_%=\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_256_start_8_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v16.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v16.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v16.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v16.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v16.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v16.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v17.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v17.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v17.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v17.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v17.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v17.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v18.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v18.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v18.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v18.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v18.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v18.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v19.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v19.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v19.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v19.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v19.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v19.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v20.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v20.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v20.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v20.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v20.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v20.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v21.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v21.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v21.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v21.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v21.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v21.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v22.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v22.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v22.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v22.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v22.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v22.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v23.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v23.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v23.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v23.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v23.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v23.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v24.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v24.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v24.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v24.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v24.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v24.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v25.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v25.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v25.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v25.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v25.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v25.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v26.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v26.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v26.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v26.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v26.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v26.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v27.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v27.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v27.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v27.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v27.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v27.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v28.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v28.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v28.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v28.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v28.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v28.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v28.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v28.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "aesd	v1.16b, v29.16b\n\t"
        "eor	v1.16b, v1.16b, v30.16b\n\t"
        "aesd	v2.16b, v29.16b\n\t"
        "eor	v2.16b, v2.16b, v30.16b\n\t"
        "aesd	v3.16b, v29.16b\n\t"
        "eor	v3.16b, v3.16b, v30.16b\n\t"
        "aesd	v4.16b, v29.16b\n\t"
        "eor	v4.16b, v4.16b, v30.16b\n\t"
        "aesd	v5.16b, v29.16b\n\t"
        "eor	v5.16b, v5.16b, v30.16b\n\t"
        "aesd	v6.16b, v29.16b\n\t"
        "eor	v6.16b, v6.16b, v30.16b\n\t"
        "aesd	v7.16b, v29.16b\n\t"
        "eor	v7.16b, v7.16b, v30.16b\n\t"
        "sub	%w[sz], %w[sz], #8\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "st1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.ge	L_aes_decrypt_blocks_arm64_crypto_256_start_8_%=\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_256_start_4_%=: \n\t"
        "cmp	%w[sz], #4\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_256_start_2_%=\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v16.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v16.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v17.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v17.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v18.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v18.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v19.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v19.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v20.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v20.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v21.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v21.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v22.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v22.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v23.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v23.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v24.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v24.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v25.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v25.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v26.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v26.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v27.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v27.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v28.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v28.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v28.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v28.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "aesd	v1.16b, v29.16b\n\t"
        "eor	v1.16b, v1.16b, v30.16b\n\t"
        "aesd	v2.16b, v29.16b\n\t"
        "eor	v2.16b, v2.16b, v30.16b\n\t"
        "aesd	v3.16b, v29.16b\n\t"
        "eor	v3.16b, v3.16b, v30.16b\n\t"
        "sub	%w[sz], %w[sz], #4\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_256_start_2_%=: \n\t"
        "cmp	%w[sz], #2\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t"
        "ld1	{v0.16b, v1.16b}, [%x[in]], #32\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v28.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v28.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "aesd	v1.16b, v29.16b\n\t"
        "eor	v1.16b, v1.16b, v30.16b\n\t"
        "sub	%w[sz], %w[sz], #2\n\t"
        "st1	{v0.16b, v1.16b}, [%x[out]], #32\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=: \n\t"
        "cbz	%w[sz], L_aes_decrypt_blocks_arm64_crypto_256_done_%=\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v26.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v27.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v28.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_256_done_%=: \n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_decrypt_blocks_arm64_crypto_done_%=\n\t"
        /* AES_ECB_128 */
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_start_128_%=: \n\t"
#ifndef NO_AES_128
        "cmp	%w[sz], #1\n\t"
        "b.eq	L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_128_start_4_%=\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_128_start_8_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v16.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v16.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v16.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v16.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v16.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v16.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v17.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v17.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v17.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v17.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v17.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v17.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v18.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v18.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v18.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v18.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v18.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v18.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v19.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v19.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v19.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v19.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v19.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v19.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v20.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v20.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v20.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v20.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v20.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v20.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v21.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v21.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v21.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v21.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v21.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v21.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v22.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v22.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v22.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v22.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v22.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v22.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v23.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v23.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v23.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v23.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v23.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v23.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v24.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v24.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v4.16b, v24.16b\n\t"
        "aesimc	v4.16b, v4.16b\n\t"
        "aesd	v5.16b, v24.16b\n\t"
        "aesimc	v5.16b, v5.16b\n\t"
        "aesd	v6.16b, v24.16b\n\t"
        "aesimc	v6.16b, v6.16b\n\t"
        "aesd	v7.16b, v24.16b\n\t"
        "aesimc	v7.16b, v7.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "eor	v1.16b, v1.16b, v26.16b\n\t"
        "aesd	v2.16b, v25.16b\n\t"
        "eor	v2.16b, v2.16b, v26.16b\n\t"
        "aesd	v3.16b, v25.16b\n\t"
        "eor	v3.16b, v3.16b, v26.16b\n\t"
        "aesd	v4.16b, v25.16b\n\t"
        "eor	v4.16b, v4.16b, v26.16b\n\t"
        "aesd	v5.16b, v25.16b\n\t"
        "eor	v5.16b, v5.16b, v26.16b\n\t"
        "aesd	v6.16b, v25.16b\n\t"
        "eor	v6.16b, v6.16b, v26.16b\n\t"
        "aesd	v7.16b, v25.16b\n\t"
        "eor	v7.16b, v7.16b, v26.16b\n\t"
        "sub	%w[sz], %w[sz], #8\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "st1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[out]], #0x40\n\t"
        "cmp	%w[sz], #8\n\t"
        "b.ge	L_aes_decrypt_blocks_arm64_crypto_128_start_8_%=\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_128_start_4_%=: \n\t"
        "cmp	%w[sz], #4\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_128_start_2_%=\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v16.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v16.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v17.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v17.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v18.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v18.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v19.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v19.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v20.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v20.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v21.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v21.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v22.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v22.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v23.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v23.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v2.16b, v24.16b\n\t"
        "aesimc	v2.16b, v2.16b\n\t"
        "aesd	v3.16b, v24.16b\n\t"
        "aesimc	v3.16b, v3.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "eor	v1.16b, v1.16b, v26.16b\n\t"
        "aesd	v2.16b, v25.16b\n\t"
        "eor	v2.16b, v2.16b, v26.16b\n\t"
        "aesd	v3.16b, v25.16b\n\t"
        "eor	v3.16b, v3.16b, v26.16b\n\t"
        "sub	%w[sz], %w[sz], #4\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_128_start_2_%=: \n\t"
        "cmp	%w[sz], #2\n\t"
        "b.lt	L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t"
        "ld1	{v0.16b, v1.16b}, [%x[in]], #32\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "eor	v1.16b, v1.16b, v26.16b\n\t"
        "sub	%w[sz], %w[sz], #2\n\t"
        "st1	{v0.16b, v1.16b}, [%x[out]], #32\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=: \n\t"
        "cbz	%w[sz], L_aes_decrypt_blocks_arm64_crypto_128_done_%=\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aesd	v0.16b, v16.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v17.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v18.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v19.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v20.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v21.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v22.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v23.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v24.16b\n\t"
        "aesimc	v0.16b, v0.16b\n\t"
        "aesd	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_128_done_%=: \n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_decrypt_blocks_arm64_crypto_done_%=: \n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [nr] "+r" (nr)
        : [in] "r" (in)
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
            "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
            "v26", "v27", "v28", "v29", "v30"
    );
}

#endif /* HAVE_AES_DECRYPT */
#endif /* HAVE_AES_ECB */
#ifdef HAVE_AES_CBC
void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg,
    byte* key, int nr)
{
    __asm__ __volatile__ (
        "ld1	{v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v0.2d}, [%x[reg]]\n\t"
        "subs	%w[nr], %w[nr], #12\n\t"
        "lsr	%w[sz], %w[sz], #4\n\t"
        "b.lt	L_aes_cbc_encrypt_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_cbc_encrypt_arm64_crypto_start_256_%=\n\t"
        /* AES_CBC_192 */
#ifndef NO_AES_192
        "ld1	{v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t"
        "\n"
    "L_aes_cbc_encrypt_arm64_crypto_loop_192_%=: \n\t"
        "ld1	{v28.2d}, [%x[key]]\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "subs	%w[sz], %w[sz], #1\n\t"
        "eor	v0.16b, v0.16b, v1.16b\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "b.ne	L_aes_cbc_encrypt_arm64_crypto_loop_192_%=\n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_cbc_encrypt_arm64_crypto_done_%=\n\t"
        /* AES_CBC_256 */
        "\n"
    "L_aes_cbc_encrypt_arm64_crypto_start_256_%=: \n\t"
#ifndef NO_AES_256
        "ld1	{v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v28.2d, v29.2d}, [%x[key]], #32\n\t"
        "\n"
    "L_aes_cbc_encrypt_arm64_crypto_loop_256_%=: \n\t"
        "ld1	{v30.2d}, [%x[key]]\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "subs	%w[sz], %w[sz], #1\n\t"
        "eor	v0.16b, v0.16b, v1.16b\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v26.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v27.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v28.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "b.ne	L_aes_cbc_encrypt_arm64_crypto_loop_256_%=\n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_cbc_encrypt_arm64_crypto_done_%=\n\t"
        /* AES_CBC_128 */
        "\n"
    "L_aes_cbc_encrypt_arm64_crypto_start_128_%=: \n\t"
#ifndef NO_AES_128
        "ld1	{v24.2d, v25.2d}, [%x[key]], #32\n\t"
        "\n"
    "L_aes_cbc_encrypt_arm64_crypto_loop_128_%=: \n\t"
        "ld1	{v26.2d}, [%x[key]]\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "subs	%w[sz], %w[sz], #1\n\t"
        "eor	v0.16b, v0.16b, v1.16b\n\t"
        "aese	v0.16b, v16.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v17.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v18.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v19.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v20.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v21.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v22.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v23.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v24.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "st1	{v0.16b}, [%x[out]], #16\n\t"
        "b.ne	L_aes_cbc_encrypt_arm64_crypto_loop_128_%=\n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_cbc_encrypt_arm64_crypto_done_%=: \n\t"
        "st1	{v0.2d}, [%x[reg]]\n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key),
          [nr] "+r" (nr)
        : [in] "r" (in)
        : "memory", "cc", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21",
            "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
    );
}

#ifdef HAVE_AES_DECRYPT
void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg,
    byte* key, int nr)
{
    __asm__ __volatile__ (
        "ld1	{v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v0.2d}, [%x[reg]]\n\t"
        "lsr	%w[sz], %w[sz], #4\n\t"
        "cmp	%w[nr], #12\n\t"
        "b.lt	L_aes_cbc_decrypt_blocks_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_cbc_decrypt_blocks_arm64_crypto_start_256_%=\n\t"
        /* AES_CBC_192 */
#ifndef NO_AES_192
        "ld1	{v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v28.2d}, [%x[key]]\n\t"
        "cmp	%w[sz], #10\n\t"
        "b.le	L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t"
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_long_%=: \n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "sub	%w[sz], %w[sz], #1\n\t"
        "mov	v2.16b, v1.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "eor	v0.16b, v0.16b, v28.16b\n\t"
        "eor	v1.16b, v1.16b, v0.16b\n\t"
        "mov	v0.16b, v2.16b\n\t"
        "st1	{v1.16b}, [%x[out]], #16\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.ge	L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_long_%=\n\t"
        "b	L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=: \n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "sub	%w[sz], %w[sz], #1\n\t"
        "eor	v2.16b, v0.16b, v28.16b\n\t"
        "mov	v0.16b, v1.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "eor	v1.16b, v1.16b, v2.16b\n\t"
        "st1	{v1.16b}, [%x[out]], #16\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.ge	L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t"
        /* AES_CBC_256 */
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_start_256_%=: \n\t"
#ifndef NO_AES_256
        "ld1	{v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v28.2d, v29.2d}, [%x[key]], #32\n\t"
        "ld1	{v30.2d}, [%x[key]]\n\t"
        "cmp	%w[sz], #5\n\t"
        "b.le	L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t"
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_long_%=: \n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "sub	%w[sz], %w[sz], #1\n\t"
        "mov	v2.16b, v1.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v28.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v29.16b\n\t"
        "eor	v0.16b, v0.16b, v30.16b\n\t"
        "eor	v1.16b, v1.16b, v0.16b\n\t"
        "mov	v0.16b, v2.16b\n\t"
        "st1	{v1.16b}, [%x[out]], #16\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.ge	L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_long_%=\n\t"
        "b	L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=: \n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "sub	%w[sz], %w[sz], #1\n\t"
        "eor	v2.16b, v0.16b, v30.16b\n\t"
        "mov	v0.16b, v1.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v26.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v27.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v28.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v29.16b\n\t"
        "eor	v1.16b, v1.16b, v2.16b\n\t"
        "st1	{v1.16b}, [%x[out]], #16\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.ge	L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t"
        /* AES_CBC_128 */
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_start_128_%=: \n\t"
#ifndef NO_AES_128
        "ld1	{v24.2d, v25.2d}, [%x[key]], #32\n\t"
        "ld1	{v26.2d}, [%x[key]]\n\t"
        "cmp	%w[sz], #24\n\t"
        "b.le	L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t"
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_long_%=: \n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "sub	%w[sz], %w[sz], #1\n\t"
        "mov	v2.16b, v1.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "eor	v0.16b, v0.16b, v26.16b\n\t"
        "eor	v1.16b, v1.16b, v0.16b\n\t"
        "mov	v0.16b, v2.16b\n\t"
        "st1	{v1.16b}, [%x[out]], #16\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.ge	L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_long_%=\n\t"
        "b	L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=: \n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "sub	%w[sz], %w[sz], #1\n\t"
        "eor	v2.16b, v0.16b, v26.16b\n\t"
        "mov	v0.16b, v1.16b\n\t"
        "aesd	v1.16b, v16.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v17.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v18.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v19.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v20.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v21.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v22.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v23.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v24.16b\n\t"
        "aesimc	v1.16b, v1.16b\n\t"
        "aesd	v1.16b, v25.16b\n\t"
        "eor	v1.16b, v1.16b, v2.16b\n\t"
        "st1	{v1.16b}, [%x[out]], #16\n\t"
        "cmp	%w[sz], #1\n\t"
        "b.ge	L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=: \n\t"
        "st1	{v0.2d}, [%x[reg]]\n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key),
          [nr] "+r" (nr)
        : [in] "r" (in)
        : "memory", "cc", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20",
            "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
            "v30"
    );
}

#endif /* HAVE_AES_DECRYPT */
#endif /* HAVE_AES_CBC */
#ifdef WOLFSSL_AES_COUNTER
void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg,
    byte* key, byte* tmp, word32* left, word32 nr)
{
    __asm__ __volatile__ (
        "stp	x29, x30, [sp, #-32]!\n\t"
        "add	x29, sp, #0\n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v15.2d}, [%x[reg]]\n\t"
        "rev64	v16.16b, v15.16b\n\t"
        "lsr	w8, %w[sz], #4\n\t"
        "and	%w[sz], %w[sz], #15\n\t"
        "mov	x9, v16.d[1]\n\t"
        "mov	x10, v16.d[0]\n\t"
        "cmp	%w[nr], #12\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_ctr_encrypt_arm64_crypto_start_256_%=\n\t"
        /* AES_CTR_192 */
#ifndef NO_AES_192
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v12.2d}, [%x[key]]\n\t"
        "cmp	w8, #1\n\t"
        "b.le	L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=\n\t"
        "adds	x11, x9, #1\n\t"
        "adc	x12, x10, xzr\n\t"
        "cmp	w8, #8\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_192_start_4_%=\n\t"
        "adds	x13, x9, #2\n\t"
        "adc	x14, x10, xzr\n\t"
        "adds	x15, x9, #3\n\t"
        "adc	x16, x10, xzr\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_192_start_8_%=: \n\t"
        "ld1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t"
        "mov	v17.d[0], x12\n\t"
        "mov	v17.d[1], x11\n\t"
        "mov	v18.d[0], x14\n\t"
        "mov	v18.d[1], x13\n\t"
        "adds	x17, x9, #4\n\t"
        "mov	v19.d[0], x16\n\t"
        "adc	x19, x10, xzr\n\t"
        "mov	v19.d[1], x15\n\t"
        "adds	x20, x9, #5\n\t"
        "mov	v20.d[0], x19\n\t"
        "adc	x21, x10, xzr\n\t"
        "mov	v20.d[1], x17\n\t"
        "adds	x22, x9, #6\n\t"
        "mov	v21.d[0], x21\n\t"
        "adc	x23, x10, xzr\n\t"
        "mov	v21.d[1], x20\n\t"
        "adds	x24, x9, #7\n\t"
        "mov	v22.d[0], x23\n\t"
        "adc	x25, x10, xzr\n\t"
        "mov	v22.d[1], x22\n\t"
        "mov	v23.d[0], x25\n\t"
        "mov	v23.d[1], x24\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "rev64	v18.16b, v18.16b\n\t"
        "rev64	v19.16b, v19.16b\n\t"
        "rev64	v20.16b, v20.16b\n\t"
        "rev64	v21.16b, v21.16b\n\t"
        "rev64	v22.16b, v22.16b\n\t"
        "rev64	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v0.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v0.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v0.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v0.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v0.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v0.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v1.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v1.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v1.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v1.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v1.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v1.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v2.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v2.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v2.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v2.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v2.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v2.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v3.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v3.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v3.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v3.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v3.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v3.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v4.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v4.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v4.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v4.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v4.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v4.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v5.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v5.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v5.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v5.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v5.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v5.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v6.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v6.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v6.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v6.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v6.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v6.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v7.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v7.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v7.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v7.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v7.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v7.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v8.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v8.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v8.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v8.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v8.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v8.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v9.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v9.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v9.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v9.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v9.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v9.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v10.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v10.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v10.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v10.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v10.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v10.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v18.16b, v11.16b\n\t"
        "eor	v18.16b, v18.16b, v12.16b\n\t"
        "aese	v19.16b, v11.16b\n\t"
        "eor	v19.16b, v19.16b, v12.16b\n\t"
        "aese	v20.16b, v11.16b\n\t"
        "eor	v20.16b, v20.16b, v12.16b\n\t"
        "aese	v21.16b, v11.16b\n\t"
        "eor	v21.16b, v21.16b, v12.16b\n\t"
        "aese	v22.16b, v11.16b\n\t"
        "eor	v22.16b, v22.16b, v12.16b\n\t"
        "aese	v23.16b, v11.16b\n\t"
        "eor	v23.16b, v23.16b, v12.16b\n\t"
        "adds	x9, x9, #8\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "adds	x11, x11, #8\n\t"
        "eor	v26.16b, v26.16b, v18.16b\n\t"
        "adc	x12, x12, xzr\n\t"
        "eor	v27.16b, v27.16b, v19.16b\n\t"
        "adds	x13, x13, #8\n\t"
        "eor	v28.16b, v28.16b, v20.16b\n\t"
        "adc	x14, x14, xzr\n\t"
        "eor	v29.16b, v29.16b, v21.16b\n\t"
        "adds	x15, x15, #8\n\t"
        "eor	v30.16b, v30.16b, v22.16b\n\t"
        "adc	x16, x16, xzr\n\t"
        "eor	v31.16b, v31.16b, v23.16b\n\t"
        "sub	w8, w8, #8\n\t"
        "st1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t"
        "st1	{v28.16b, v29.16b, v30.16b, v31.16b}, [%x[out]], #0x40\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "cmp	w8, #8\n\t"
        "b.ge	L_aes_ctr_encrypt_arm64_crypto_192_start_8_%=\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_192_start_4_%=: \n\t"
        "cmp	w8, #4\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_192_start_2_%=\n\t"
        "ld1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t"
        "adds	x13, x9, #2\n\t"
        "mov	v17.d[0], x12\n\t"
        "adc	x14, x10, xzr\n\t"
        "mov	v17.d[1], x11\n\t"
        "adds	x15, x9, #3\n\t"
        "mov	v18.d[0], x14\n\t"
        "adc	x16, x10, xzr\n\t"
        "mov	v18.d[1], x13\n\t"
        "mov	v19.d[0], x16\n\t"
        "mov	v19.d[1], x15\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "rev64	v18.16b, v18.16b\n\t"
        "rev64	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v0.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v0.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v1.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v1.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v2.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v2.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v3.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v3.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v4.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v4.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v5.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v5.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v6.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v6.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v7.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v7.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v8.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v8.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v9.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v9.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v10.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v10.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v18.16b, v11.16b\n\t"
        "eor	v18.16b, v18.16b, v12.16b\n\t"
        "aese	v19.16b, v11.16b\n\t"
        "eor	v19.16b, v19.16b, v12.16b\n\t"
        "adds	x9, x9, #4\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "adds	x11, x11, #4\n\t"
        "eor	v26.16b, v26.16b, v18.16b\n\t"
        "adc	x12, x12, xzr\n\t"
        "eor	v27.16b, v27.16b, v19.16b\n\t"
        "sub	w8, w8, #4\n\t"
        "st1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_192_start_2_%=: \n\t"
        "cmp	w8, #2\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=\n\t"
        "ld1	{v24.16b, v25.16b}, [%x[in]], #32\n\t"
        "eor	v20.16b, v20.16b, v20.16b\n\t"
        "ext	v19.16b, v16.16b, v16.16b, #8\n\t"
        "movi	v18.16b, #1\n\t"
        "ext	v18.16b, v18.16b, v20.16b, #15\n\t"
        "add	v17.2d, v19.2d, v18.2d\n\t"
        "cmeq	v19.2d, v17.2d, #0\n\t"
        "ext	v19.16b, v20.16b, v19.16b, #8\n\t"
        "sub	v17.2d, v17.2d, v19.2d\n\t"
        "ext	v17.16b, v17.16b, v17.16b, #8\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "adds	x9, x9, #2\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "sub	w8, w8, #2\n\t"
        "st1	{v24.16b, v25.16b}, [%x[out]], #32\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=: \n\t"
        "cbz	w8, L_aes_ctr_encrypt_arm64_crypto_192_done_%=\n\t"
        "ld1	{v24.16b}, [%x[in]], #16\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "adds	x9, x9, #1\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "st1	{v24.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_192_done_%=: \n\t"
        "cbz	%w[sz], L_aes_ctr_encrypt_arm64_crypto_192_partial_done_%=\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "adds	x9, x9, #1\n\t"
        "adc	x10, x10, xzr\n\t"
        "st1	{v16.2d}, [%x[tmp]]\n\t"
        "mov	w13, #16\n\t"
        "sub	w13, w13, %w[sz]\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_192_start_byte_%=: \n\t"
        "ldrb	w11, [%x[tmp]], #1\n\t"
        "ldrb	w12, [%x[in]], #1\n\t"
        "eor	w11, w11, w12\n\t"
        "subs	%w[sz], %w[sz], #1\n\t"
        "strb	w11, [%x[out]], #1\n\t"
        "b.gt	L_aes_ctr_encrypt_arm64_crypto_192_start_byte_%=\n\t"
        "str	w13, [%x[left]]\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_192_partial_done_%=: \n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_ctr_encrypt_arm64_crypto_done_%=\n\t"
        /* AES_CTR_256 */
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_start_256_%=: \n\t"
#ifndef NO_AES_256
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v12.2d, v13.2d}, [%x[key]], #32\n\t"
        "ld1	{v14.2d}, [%x[key]]\n\t"
        "cmp	w8, #1\n\t"
        "b.le	L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=\n\t"
        "adds	x11, x9, #1\n\t"
        "adc	x12, x10, xzr\n\t"
        "cmp	w8, #8\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_256_start_4_%=\n\t"
        "adds	x13, x9, #2\n\t"
        "adc	x14, x10, xzr\n\t"
        "adds	x15, x9, #3\n\t"
        "adc	x16, x10, xzr\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_256_start_8_%=: \n\t"
        "ld1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t"
        "mov	v17.d[0], x12\n\t"
        "mov	v17.d[1], x11\n\t"
        "mov	v18.d[0], x14\n\t"
        "mov	v18.d[1], x13\n\t"
        "adds	x17, x9, #4\n\t"
        "mov	v19.d[0], x16\n\t"
        "adc	x19, x10, xzr\n\t"
        "mov	v19.d[1], x15\n\t"
        "adds	x20, x9, #5\n\t"
        "mov	v20.d[0], x19\n\t"
        "adc	x21, x10, xzr\n\t"
        "mov	v20.d[1], x17\n\t"
        "adds	x22, x9, #6\n\t"
        "mov	v21.d[0], x21\n\t"
        "adc	x23, x10, xzr\n\t"
        "mov	v21.d[1], x20\n\t"
        "adds	x24, x9, #7\n\t"
        "mov	v22.d[0], x23\n\t"
        "adc	x25, x10, xzr\n\t"
        "mov	v22.d[1], x22\n\t"
        "mov	v23.d[0], x25\n\t"
        "mov	v23.d[1], x24\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "rev64	v18.16b, v18.16b\n\t"
        "rev64	v19.16b, v19.16b\n\t"
        "rev64	v20.16b, v20.16b\n\t"
        "rev64	v21.16b, v21.16b\n\t"
        "rev64	v22.16b, v22.16b\n\t"
        "rev64	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v0.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v0.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v0.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v0.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v0.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v0.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v1.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v1.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v1.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v1.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v1.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v1.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v2.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v2.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v2.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v2.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v2.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v2.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v3.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v3.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v3.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v3.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v3.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v3.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v4.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v4.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v4.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v4.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v4.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v4.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v5.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v5.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v5.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v5.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v5.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v5.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v6.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v6.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v6.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v6.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v6.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v6.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v7.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v7.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v7.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v7.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v7.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v7.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v8.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v8.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v8.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v8.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v8.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v8.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v9.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v9.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v9.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v9.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v9.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v9.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v10.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v10.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v10.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v10.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v10.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v10.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v11.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v11.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v11.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v11.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v11.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v11.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v12.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v12.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v12.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v12.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v12.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v12.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v14.16b\n\t"
        "aese	v18.16b, v13.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "aese	v19.16b, v13.16b\n\t"
        "eor	v19.16b, v19.16b, v14.16b\n\t"
        "aese	v20.16b, v13.16b\n\t"
        "eor	v20.16b, v20.16b, v14.16b\n\t"
        "aese	v21.16b, v13.16b\n\t"
        "eor	v21.16b, v21.16b, v14.16b\n\t"
        "aese	v22.16b, v13.16b\n\t"
        "eor	v22.16b, v22.16b, v14.16b\n\t"
        "aese	v23.16b, v13.16b\n\t"
        "eor	v23.16b, v23.16b, v14.16b\n\t"
        "adds	x9, x9, #8\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "adds	x11, x11, #8\n\t"
        "eor	v26.16b, v26.16b, v18.16b\n\t"
        "adc	x12, x12, xzr\n\t"
        "eor	v27.16b, v27.16b, v19.16b\n\t"
        "adds	x13, x13, #8\n\t"
        "eor	v28.16b, v28.16b, v20.16b\n\t"
        "adc	x14, x14, xzr\n\t"
        "eor	v29.16b, v29.16b, v21.16b\n\t"
        "adds	x15, x15, #8\n\t"
        "eor	v30.16b, v30.16b, v22.16b\n\t"
        "adc	x16, x16, xzr\n\t"
        "eor	v31.16b, v31.16b, v23.16b\n\t"
        "sub	w8, w8, #8\n\t"
        "st1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t"
        "st1	{v28.16b, v29.16b, v30.16b, v31.16b}, [%x[out]], #0x40\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "cmp	w8, #8\n\t"
        "b.ge	L_aes_ctr_encrypt_arm64_crypto_256_start_8_%=\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_256_start_4_%=: \n\t"
        "cmp	w8, #4\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_256_start_2_%=\n\t"
        "ld1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t"
        "adds	x13, x9, #2\n\t"
        "mov	v17.d[0], x12\n\t"
        "adc	x14, x10, xzr\n\t"
        "mov	v17.d[1], x11\n\t"
        "adds	x15, x9, #3\n\t"
        "mov	v18.d[0], x14\n\t"
        "adc	x16, x10, xzr\n\t"
        "mov	v18.d[1], x13\n\t"
        "mov	v19.d[0], x16\n\t"
        "mov	v19.d[1], x15\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "rev64	v18.16b, v18.16b\n\t"
        "rev64	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v0.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v0.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v1.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v1.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v2.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v2.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v3.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v3.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v4.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v4.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v5.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v5.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v6.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v6.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v7.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v7.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v8.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v8.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v9.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v9.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v10.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v10.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v11.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v11.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v12.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v12.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v14.16b\n\t"
        "aese	v18.16b, v13.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "aese	v19.16b, v13.16b\n\t"
        "eor	v19.16b, v19.16b, v14.16b\n\t"
        "adds	x9, x9, #4\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "adds	x11, x11, #4\n\t"
        "eor	v26.16b, v26.16b, v18.16b\n\t"
        "adc	x12, x12, xzr\n\t"
        "eor	v27.16b, v27.16b, v19.16b\n\t"
        "sub	w8, w8, #4\n\t"
        "st1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_256_start_2_%=: \n\t"
        "cmp	w8, #2\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=\n\t"
        "ld1	{v24.16b, v25.16b}, [%x[in]], #32\n\t"
        "eor	v20.16b, v20.16b, v20.16b\n\t"
        "ext	v19.16b, v16.16b, v16.16b, #8\n\t"
        "movi	v18.16b, #1\n\t"
        "ext	v18.16b, v18.16b, v20.16b, #15\n\t"
        "add	v17.2d, v19.2d, v18.2d\n\t"
        "cmeq	v19.2d, v17.2d, #0\n\t"
        "ext	v19.16b, v20.16b, v19.16b, #8\n\t"
        "sub	v17.2d, v17.2d, v19.2d\n\t"
        "ext	v17.16b, v17.16b, v17.16b, #8\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v14.16b\n\t"
        "adds	x9, x9, #2\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "sub	w8, w8, #2\n\t"
        "st1	{v24.16b, v25.16b}, [%x[out]], #32\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=: \n\t"
        "cbz	w8, L_aes_ctr_encrypt_arm64_crypto_256_done_%=\n\t"
        "ld1	{v24.16b}, [%x[in]], #16\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "adds	x9, x9, #1\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "st1	{v24.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_256_done_%=: \n\t"
        "cbz	%w[sz], L_aes_ctr_encrypt_arm64_crypto_256_partial_done_%=\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "adds	x9, x9, #1\n\t"
        "adc	x10, x10, xzr\n\t"
        "st1	{v16.2d}, [%x[tmp]]\n\t"
        "mov	w13, #16\n\t"
        "sub	w13, w13, %w[sz]\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_256_start_byte_%=: \n\t"
        "ldrb	w11, [%x[tmp]], #1\n\t"
        "ldrb	w12, [%x[in]], #1\n\t"
        "eor	w11, w11, w12\n\t"
        "subs	%w[sz], %w[sz], #1\n\t"
        "strb	w11, [%x[out]], #1\n\t"
        "b.gt	L_aes_ctr_encrypt_arm64_crypto_256_start_byte_%=\n\t"
        "str	w13, [%x[left]]\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_256_partial_done_%=: \n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_ctr_encrypt_arm64_crypto_done_%=\n\t"
        /* AES_CTR_128 */
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_start_128_%=: \n\t"
#ifndef NO_AES_128
        "ld1	{v8.2d, v9.2d}, [%x[key]], #32\n\t"
        "ld1	{v10.2d}, [%x[key]]\n\t"
        "cmp	w8, #1\n\t"
        "b.le	L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=\n\t"
        "adds	x11, x9, #1\n\t"
        "adc	x12, x10, xzr\n\t"
        "cmp	w8, #8\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_128_start_4_%=\n\t"
        "adds	x13, x9, #2\n\t"
        "adc	x14, x10, xzr\n\t"
        "adds	x15, x9, #3\n\t"
        "adc	x16, x10, xzr\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_128_start_8_%=: \n\t"
        "ld1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t"
        "ld1	{v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t"
        "mov	v17.d[0], x12\n\t"
        "mov	v17.d[1], x11\n\t"
        "mov	v18.d[0], x14\n\t"
        "mov	v18.d[1], x13\n\t"
        "adds	x17, x9, #4\n\t"
        "mov	v19.d[0], x16\n\t"
        "adc	x19, x10, xzr\n\t"
        "mov	v19.d[1], x15\n\t"
        "adds	x20, x9, #5\n\t"
        "mov	v20.d[0], x19\n\t"
        "adc	x21, x10, xzr\n\t"
        "mov	v20.d[1], x17\n\t"
        "adds	x22, x9, #6\n\t"
        "mov	v21.d[0], x21\n\t"
        "adc	x23, x10, xzr\n\t"
        "mov	v21.d[1], x20\n\t"
        "adds	x24, x9, #7\n\t"
        "mov	v22.d[0], x23\n\t"
        "adc	x25, x10, xzr\n\t"
        "mov	v22.d[1], x22\n\t"
        "mov	v23.d[0], x25\n\t"
        "mov	v23.d[1], x24\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "rev64	v18.16b, v18.16b\n\t"
        "rev64	v19.16b, v19.16b\n\t"
        "rev64	v20.16b, v20.16b\n\t"
        "rev64	v21.16b, v21.16b\n\t"
        "rev64	v22.16b, v22.16b\n\t"
        "rev64	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v0.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v0.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v0.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v0.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v0.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v0.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v1.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v1.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v1.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v1.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v1.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v1.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v2.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v2.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v2.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v2.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v2.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v2.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v3.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v3.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v3.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v3.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v3.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v3.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v4.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v4.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v4.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v4.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v4.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v4.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v5.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v5.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v5.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v5.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v5.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v5.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v6.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v6.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v6.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v6.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v6.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v6.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v7.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v7.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v7.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v7.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v7.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v7.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v8.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v8.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v20.16b, v8.16b\n\t"
        "aesmc	v20.16b, v20.16b\n\t"
        "aese	v21.16b, v8.16b\n\t"
        "aesmc	v21.16b, v21.16b\n\t"
        "aese	v22.16b, v8.16b\n\t"
        "aesmc	v22.16b, v22.16b\n\t"
        "aese	v23.16b, v8.16b\n\t"
        "aesmc	v23.16b, v23.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "aese	v18.16b, v9.16b\n\t"
        "eor	v18.16b, v18.16b, v10.16b\n\t"
        "aese	v19.16b, v9.16b\n\t"
        "eor	v19.16b, v19.16b, v10.16b\n\t"
        "aese	v20.16b, v9.16b\n\t"
        "eor	v20.16b, v20.16b, v10.16b\n\t"
        "aese	v21.16b, v9.16b\n\t"
        "eor	v21.16b, v21.16b, v10.16b\n\t"
        "aese	v22.16b, v9.16b\n\t"
        "eor	v22.16b, v22.16b, v10.16b\n\t"
        "aese	v23.16b, v9.16b\n\t"
        "eor	v23.16b, v23.16b, v10.16b\n\t"
        "adds	x9, x9, #8\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "adds	x11, x11, #8\n\t"
        "eor	v26.16b, v26.16b, v18.16b\n\t"
        "adc	x12, x12, xzr\n\t"
        "eor	v27.16b, v27.16b, v19.16b\n\t"
        "adds	x13, x13, #8\n\t"
        "eor	v28.16b, v28.16b, v20.16b\n\t"
        "adc	x14, x14, xzr\n\t"
        "eor	v29.16b, v29.16b, v21.16b\n\t"
        "adds	x15, x15, #8\n\t"
        "eor	v30.16b, v30.16b, v22.16b\n\t"
        "adc	x16, x16, xzr\n\t"
        "eor	v31.16b, v31.16b, v23.16b\n\t"
        "sub	w8, w8, #8\n\t"
        "st1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t"
        "st1	{v28.16b, v29.16b, v30.16b, v31.16b}, [%x[out]], #0x40\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "cmp	w8, #8\n\t"
        "b.ge	L_aes_ctr_encrypt_arm64_crypto_128_start_8_%=\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_128_start_4_%=: \n\t"
        "cmp	w8, #4\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_128_start_2_%=\n\t"
        "ld1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t"
        "adds	x13, x9, #2\n\t"
        "mov	v17.d[0], x12\n\t"
        "adc	x14, x10, xzr\n\t"
        "mov	v17.d[1], x11\n\t"
        "adds	x15, x9, #3\n\t"
        "mov	v18.d[0], x14\n\t"
        "adc	x16, x10, xzr\n\t"
        "mov	v18.d[1], x13\n\t"
        "mov	v19.d[0], x16\n\t"
        "mov	v19.d[1], x15\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "rev64	v18.16b, v18.16b\n\t"
        "rev64	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v0.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v0.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v1.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v1.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v2.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v2.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v3.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v3.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v4.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v4.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v5.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v5.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v6.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v6.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v7.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v7.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v18.16b, v8.16b\n\t"
        "aesmc	v18.16b, v18.16b\n\t"
        "aese	v19.16b, v8.16b\n\t"
        "aesmc	v19.16b, v19.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "aese	v18.16b, v9.16b\n\t"
        "eor	v18.16b, v18.16b, v10.16b\n\t"
        "aese	v19.16b, v9.16b\n\t"
        "eor	v19.16b, v19.16b, v10.16b\n\t"
        "adds	x9, x9, #4\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "adds	x11, x11, #4\n\t"
        "eor	v26.16b, v26.16b, v18.16b\n\t"
        "adc	x12, x12, xzr\n\t"
        "eor	v27.16b, v27.16b, v19.16b\n\t"
        "sub	w8, w8, #4\n\t"
        "st1	{v24.16b, v25.16b, v26.16b, v27.16b}, [%x[out]], #0x40\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_128_start_2_%=: \n\t"
        "cmp	w8, #2\n\t"
        "b.lt	L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=\n\t"
        "ld1	{v24.16b, v25.16b}, [%x[in]], #32\n\t"
        "eor	v20.16b, v20.16b, v20.16b\n\t"
        "ext	v19.16b, v16.16b, v16.16b, #8\n\t"
        "movi	v18.16b, #1\n\t"
        "ext	v18.16b, v18.16b, v20.16b, #15\n\t"
        "add	v17.2d, v19.2d, v18.2d\n\t"
        "cmeq	v19.2d, v17.2d, #0\n\t"
        "ext	v19.16b, v20.16b, v19.16b, #8\n\t"
        "sub	v17.2d, v17.2d, v19.2d\n\t"
        "ext	v17.16b, v17.16b, v17.16b, #8\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "rev64	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "adds	x9, x9, #2\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "eor	v25.16b, v25.16b, v17.16b\n\t"
        "sub	w8, w8, #2\n\t"
        "st1	{v24.16b, v25.16b}, [%x[out]], #32\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=: \n\t"
        "cbz	w8, L_aes_ctr_encrypt_arm64_crypto_128_done_%=\n\t"
        "ld1	{v24.16b}, [%x[in]], #16\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "adds	x9, x9, #1\n\t"
        "eor	v24.16b, v24.16b, v16.16b\n\t"
        "adc	x10, x10, xzr\n\t"
        "st1	{v24.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_128_done_%=: \n\t"
        "cbz	%w[sz], L_aes_ctr_encrypt_arm64_crypto_128_partial_done_%=\n\t"
        "mov	v16.d[0], x10\n\t"
        "mov	v16.d[1], x9\n\t"
        "rev64	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "adds	x9, x9, #1\n\t"
        "adc	x10, x10, xzr\n\t"
        "st1	{v16.2d}, [%x[tmp]]\n\t"
        "mov	w13, #16\n\t"
        "sub	w13, w13, %w[sz]\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_128_start_byte_%=: \n\t"
        "ldrb	w11, [%x[tmp]], #1\n\t"
        "ldrb	w12, [%x[in]], #1\n\t"
        "eor	w11, w11, w12\n\t"
        "subs	%w[sz], %w[sz], #1\n\t"
        "strb	w11, [%x[out]], #1\n\t"
        "b.gt	L_aes_ctr_encrypt_arm64_crypto_128_start_byte_%=\n\t"
        "str	w13, [%x[left]]\n\t"
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_128_partial_done_%=: \n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_ctr_encrypt_arm64_crypto_done_%=: \n\t"
        "rev	x11, x10\n\t"
        "rev	x12, x9\n\t"
        "stp	x11, x12, [%x[reg]]\n\t"
        "ldp	x29, x30, [sp], #32\n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key),
          [tmp] "+r" (tmp), [left] "+r" (left), [nr] "+r" (nr)
        : [in] "r" (in)
        : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
            "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "v0",
            "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
            "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20",
            "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
            "v30", "v31"
    );
}

#endif /* WOLFSSL_AES_COUNTER */
#ifdef HAVE_AESGCM
void AES_GCM_set_key_AARCH64(const byte* nonce, const byte* key, byte* gcm_h,
    int nr)
{
    __asm__ __volatile__ (
        "ld1	{v0.16b}, [%x[nonce]]\n\t"
        "ld1	{v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v3.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v4.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "ld1	{v1.2d, v2.2d, v3.2d, v4.2d}, [%x[key]], #0x40\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v3.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v4.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "subs	%w[nr], %w[nr], #10\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "b.eq	L_aes_gcm_set_key_arm64_crypto_round_done_%=\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "subs	%w[nr], %w[nr], #2\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "b.eq	L_aes_gcm_set_key_arm64_crypto_round_done_%=\n\t"
        "ld1	{v1.2d, v2.2d}, [%x[key]], #32\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v1.16b\n\t"
        "aesmc	v0.16b, v0.16b\n\t"
        "aese	v0.16b, v2.16b\n\t"
        "\n"
    "L_aes_gcm_set_key_arm64_crypto_round_done_%=: \n\t"
        "ld1	{v1.2d}, [%x[key]]\n\t"
        "eor	v0.16b, v0.16b, v1.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "st1	{v0.2d}, [%x[gcm_h]]\n\t"
        : [gcm_h] "+r" (gcm_h), [nr] "+r" (nr)
        : [nonce] "r" (nonce), [key] "r" (key)
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4"
    );
}

void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz,
    const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz, const byte* aad,
    word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr)
{
    __asm__ __volatile__ (
        "stp	x29, x30, [sp, #-80]!\n\t"
        "add	x29, sp, #0\n\t"
        "str	%w[nr], [sp, #72]\n\t"
        "str	%x[reg], [sp, #64]\n\t"
        "str	%x[tmp], [sp, #56]\n\t"
        "str	%x[gcm_h], [sp, #48]\n\t"
        "str	%x[key], [sp, #40]\n\t"
        "str	%w[aadSz], [sp, #32]\n\t"
        "movi	v27.16b, #0x87\n\t"
        "eor	v26.16b, v26.16b, v26.16b\n\t"
        "ushr	v27.2d, v27.2d, #56\n\t"
        "ld1	{v22.2d}, [x10]\n\t"
        "cmp	w8, #0x40\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #32\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_h_done_%=\n\t"
        /* Square H => H^2 */
        "pmull2	v31.1q, v22.2d, v22.2d\n\t"
        "pmull	v30.1q, v22.1d, v22.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v23.16b, v30.16b, v31.16b\n\t"
        "cmp	w8, #0x100\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x40\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^2  => H^3 */
        "pmull	v28.1q, v22.1d, v23.1d\n\t"
        "pmull2	v29.1q, v22.2d, v23.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v23.1d\n\t"
        "pmull2	v31.1q, v31.2d, v23.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v24.16b, v28.16b, v30.16b\n\t"
        /* Square H^2 => H^4 */
        "pmull2	v31.1q, v23.2d, v23.2d\n\t"
        "pmull	v30.1q, v23.1d, v23.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v25.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "cmp	w8, #0x400\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x200\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^4  => H^5 */
        "pmull	v28.1q, v22.1d, v25.1d\n\t"
        "pmull2	v29.1q, v22.2d, v25.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v25.1d\n\t"
        "pmull2	v31.1q, v31.2d, v25.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v4.16b, v28.16b, v30.16b\n\t"
        /* Square H^3 => H^6 */
        "pmull2	v31.1q, v24.2d, v24.2d\n\t"
        "pmull	v30.1q, v24.1d, v24.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v5.16b, v30.16b, v31.16b\n\t"
        /* Multiply H and H^6  => H^7 */
        "pmull	v28.1q, v22.1d, v5.1d\n\t"
        "pmull2	v29.1q, v22.2d, v5.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v5.1d\n\t"
        "pmull2	v31.1q, v31.2d, v5.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v6.16b, v28.16b, v30.16b\n\t"
        /* Square H^4 => H^8 */
        "pmull2	v31.1q, v25.2d, v25.2d\n\t"
        "pmull	v30.1q, v25.1d, v25.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v7.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_h_done_%=: \n\t"
        "lsr	w14, w8, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t"
        "cmp	w14, #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_start_8_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #8\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_aad_start_8_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=: \n\t"
        "ld1	{v18.16b, v19.16b}, [%x[aad]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #2\n\t"
        "cmp	w14, #1\n\t"
        "b.gt	L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=: \n\t"
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[aad]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_done_%=: \n\t"
        "and	w14, w8, #15\n\t"
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w14\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=\n\t"
        "ldr	x19, [%x[aad]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=\n\t"
        "ldr	w19, [%x[aad]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t"
        "ldrh	w19, [%x[aad]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=\n\t"
        "ldrb	w19, [%x[aad]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=: \n\t"
        /* Load Nonce */
        "cmp	%w[nonceSz], #12\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_ghash_nonce_%=\n\t"
        "ldr	x16, [%x[nonce]]\n\t"
        "movi	v13.4s, #1, lsl 24\n\t"
        "ldr	w17, [%x[nonce], #8]\n\t"
        "mov	v13.d[0], x16\n\t"
        "mov	v13.s[2], w17\n\t"
        "mov	w15, #1\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_ghash_nonce_%=: \n\t"
        "eor	v13.16b, v13.16b, v13.16b\n\t"
        "lsr	w14, %w[nonceSz], #4\n\t"
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_nonce_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_nonce_start_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[nonce]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_nonce_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_nonce_done_%=: \n\t"
        "and	w24, %w[nonceSz], #15\n\t"
        "cbz	x24, L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w24\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=\n\t"
        "ldr	x19, [%x[nonce]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=\n\t"
        "ldr	w19, [%x[nonce]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t"
        "ldrh	w19, [%x[nonce]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=\n\t"
        "ldrb	w19, [%x[nonce]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=: \n\t"
        "sub	x11, x11, x24\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=: \n\t"
        "eor	x14, x14, x14\n\t"
        "lsl	x24, %x[nonceSz], #3\n\t"
        "mov	v28.d[0], x14\n\t"
        "mov	v28.d[1], x24\n\t"
        "rev64	v28.16b, v28.16b\n\t"
        "rbit	v28.16b, v28.16b\n\t"
        "eor	v13.16b, v13.16b, v28.16b\n\t"
        "pmull	v28.1q, v13.1d, v22.1d\n\t"
        "pmull2	v29.1q, v13.2d, v22.2d\n\t"
        "ext	v31.16b, v13.16b, v13.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        "rbit	v13.16b, v13.16b\n\t"
        "mov	w15, v13.s[3]\n\t"
        "rev	w15, w15\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=: \n\t"
        "st1	{v13.2d}, [x12]\n\t"
        "lsr	w14, %w[sz], #4\n\t"
        "cmp	w13, #12\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_gcm_encrypt_arm64_crypto_start_256_%=\n\t"
        /* AES_GCM_192 */
#ifndef NO_AES_192
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "rev	w23, w23\n\t"
        "rev	w22, w22\n\t"
        "rev	w21, w21\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_192_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "st1	{v18.16b, v19.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "st1	{v18.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=\n\t"
        "eor	v16.16b, v16.16b, v16.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v16.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "mov	w19, w14\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=: \n\t"
        "mov	x17, #16\n\t"
        "sub	x17, x17, x14\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=: \n\t"
        "subs	x17, x17, #1\n\t"
        "strb	wzr, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=\n\t"
        "sub	x11, x11, #16\n\t"
        "ld1	{v14.2d}, [x11]\n\t"
        "rbit	v14.16b, v14.16b\n\t"
        "eor	v15.16b, v26.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v15.1d, v22.1d\n\t"
        "pmull2	v29.1q, v15.2d, v22.2d\n\t"
        "ext	v31.16b, v15.16b, v15.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=\n\t"
        "st1	{v26.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=: \n\t"
        "st1	{v26.16b}, [x11]\n\t"
        "cmp	%w[tagSz], #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=\n\t"
        "ldr	x16, [x11], #8\n\t"
        "sub	%w[tagSz], %w[tagSz], #8\n\t"
        "str	x16, [%x[tag]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=: \n\t"
        "cmp	%w[tagSz], #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=\n\t"
        "ldr	w16, [x11], #4\n\t"
        "sub	%w[tagSz], %w[tagSz], #4\n\t"
        "str	w16, [%x[tag]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=: \n\t"
        "cmp	%w[tagSz], #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t"
        "ldrh	w16, [x11], #2\n\t"
        "sub	%w[tagSz], %w[tagSz], #2\n\t"
        "strh	w16, [%x[tag]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=: \n\t"
        "cbz	%w[tagSz], L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=\n\t"
        "ldrb	w16, [x11], #1\n\t"
        "subs	%w[tagSz], %w[tagSz], #1\n\t"
        "strb	w16, [%x[tag]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=: \n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t"
        /* AES_GCM_256 */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_start_256_%=: \n\t"
#ifndef NO_AES_256
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "rev	w23, w23\n\t"
        "rev	w22, w22\n\t"
        "rev	w21, w21\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9], #16\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_256_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "st1	{v18.16b, v19.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "st1	{v18.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=\n\t"
        "eor	v16.16b, v16.16b, v16.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v16.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "mov	w19, w14\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=: \n\t"
        "mov	x17, #16\n\t"
        "sub	x17, x17, x14\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=: \n\t"
        "subs	x17, x17, #1\n\t"
        "strb	wzr, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=\n\t"
        "sub	x11, x11, #16\n\t"
        "ld1	{v14.2d}, [x11]\n\t"
        "rbit	v14.16b, v14.16b\n\t"
        "eor	v15.16b, v26.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v15.1d, v22.1d\n\t"
        "pmull2	v29.1q, v15.2d, v22.2d\n\t"
        "ext	v31.16b, v15.16b, v15.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "ldr	q11, [x9, #-32]\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "ldr	q12, [x9, #-16]\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=\n\t"
        "st1	{v26.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=: \n\t"
        "st1	{v26.16b}, [x11]\n\t"
        "cmp	%w[tagSz], #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=\n\t"
        "ldr	x16, [x11], #8\n\t"
        "sub	%w[tagSz], %w[tagSz], #8\n\t"
        "str	x16, [%x[tag]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=: \n\t"
        "cmp	%w[tagSz], #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=\n\t"
        "ldr	w16, [x11], #4\n\t"
        "sub	%w[tagSz], %w[tagSz], #4\n\t"
        "str	w16, [%x[tag]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=: \n\t"
        "cmp	%w[tagSz], #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t"
        "ldrh	w16, [x11], #2\n\t"
        "sub	%w[tagSz], %w[tagSz], #2\n\t"
        "strh	w16, [%x[tag]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=: \n\t"
        "cbz	%w[tagSz], L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=\n\t"
        "ldrb	w16, [x11], #1\n\t"
        "subs	%w[tagSz], %w[tagSz], #1\n\t"
        "strb	w16, [%x[tag]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=: \n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t"
        /* AES_GCM_128 */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_start_128_%=: \n\t"
#ifndef NO_AES_128
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "rev	w23, w23\n\t"
        "rev	w22, w22\n\t"
        "rev	w21, w21\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d}, [x9], #32\n\t"
        "ld1	{v10.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_128_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "st1	{v18.16b, v19.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "st1	{v18.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=\n\t"
        "eor	v16.16b, v16.16b, v16.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v16.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "mov	w19, w14\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=: \n\t"
        "mov	x17, #16\n\t"
        "sub	x17, x17, x14\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=: \n\t"
        "subs	x17, x17, #1\n\t"
        "strb	wzr, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=\n\t"
        "sub	x11, x11, #16\n\t"
        "ld1	{v14.2d}, [x11]\n\t"
        "rbit	v14.16b, v14.16b\n\t"
        "eor	v15.16b, v26.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v15.1d, v22.1d\n\t"
        "pmull2	v29.1q, v15.2d, v22.2d\n\t"
        "ext	v31.16b, v15.16b, v15.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=\n\t"
        "st1	{v26.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=: \n\t"
        "st1	{v26.16b}, [x11]\n\t"
        "cmp	%w[tagSz], #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=\n\t"
        "ldr	x16, [x11], #8\n\t"
        "sub	%w[tagSz], %w[tagSz], #8\n\t"
        "str	x16, [%x[tag]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=: \n\t"
        "cmp	%w[tagSz], #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=\n\t"
        "ldr	w16, [x11], #4\n\t"
        "sub	%w[tagSz], %w[tagSz], #4\n\t"
        "str	w16, [%x[tag]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=: \n\t"
        "cmp	%w[tagSz], #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t"
        "ldrh	w16, [x11], #2\n\t"
        "sub	%w[tagSz], %w[tagSz], #2\n\t"
        "strh	w16, [%x[tag]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=: \n\t"
        "cbz	%w[tagSz], L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=\n\t"
        "ldrb	w16, [x11], #1\n\t"
        "subs	%w[tagSz], %w[tagSz], #1\n\t"
        "strb	w16, [%x[tag]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=: \n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_done_%=: \n\t"
        "ldp	x29, x30, [sp], #0x50\n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz),
          [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz),
          [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp),
          [reg] "+r" (reg), [nr] "+r" (nr)
        : [in] "r" (in), [nonce] "r" (nonce), [aad] "r" (aad)
        : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21",
            "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
            "v27", "v28", "v29", "v30", "v31"
    );
}

#ifdef HAVE_AES_DECRYPT
int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz,
    const byte* nonce, word32 nonceSz, const byte* tag, word32 tagSz,
    const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg,
    int nr)
{
    __asm__ __volatile__ (
        "stp	x29, x30, [sp, #-80]!\n\t"
        "add	x29, sp, #0\n\t"
        "str	%w[nr], [sp, #72]\n\t"
        "str	%x[reg], [sp, #64]\n\t"
        "str	%x[tmp], [sp, #56]\n\t"
        "str	%x[gcm_h], [sp, #48]\n\t"
        "str	%x[key], [sp, #40]\n\t"
        "str	%w[aadSz], [sp, #32]\n\t"
        "movi	v27.16b, #0x87\n\t"
        "eor	v26.16b, v26.16b, v26.16b\n\t"
        "ushr	v27.2d, v27.2d, #56\n\t"
        "ld1	{v22.2d}, [x10]\n\t"
        "cmp	w8, #0x40\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #32\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_h_done_%=\n\t"
        /* Square H => H^2 */
        "pmull2	v31.1q, v22.2d, v22.2d\n\t"
        "pmull	v30.1q, v22.1d, v22.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v23.16b, v30.16b, v31.16b\n\t"
        "cmp	w8, #0x100\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x40\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^2  => H^3 */
        "pmull	v28.1q, v22.1d, v23.1d\n\t"
        "pmull2	v29.1q, v22.2d, v23.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v23.1d\n\t"
        "pmull2	v31.1q, v31.2d, v23.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v24.16b, v28.16b, v30.16b\n\t"
        /* Square H^2 => H^4 */
        "pmull2	v31.1q, v23.2d, v23.2d\n\t"
        "pmull	v30.1q, v23.1d, v23.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v25.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "cmp	w8, #0x400\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x200\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^4  => H^5 */
        "pmull	v28.1q, v22.1d, v25.1d\n\t"
        "pmull2	v29.1q, v22.2d, v25.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v25.1d\n\t"
        "pmull2	v31.1q, v31.2d, v25.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v4.16b, v28.16b, v30.16b\n\t"
        /* Square H^3 => H^6 */
        "pmull2	v31.1q, v24.2d, v24.2d\n\t"
        "pmull	v30.1q, v24.1d, v24.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v5.16b, v30.16b, v31.16b\n\t"
        /* Multiply H and H^6  => H^7 */
        "pmull	v28.1q, v22.1d, v5.1d\n\t"
        "pmull2	v29.1q, v22.2d, v5.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v5.1d\n\t"
        "pmull2	v31.1q, v31.2d, v5.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v6.16b, v28.16b, v30.16b\n\t"
        /* Square H^4 => H^8 */
        "pmull2	v31.1q, v25.2d, v25.2d\n\t"
        "pmull	v30.1q, v25.1d, v25.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v7.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_h_done_%=: \n\t"
        "lsr	w14, w8, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t"
        "cmp	w14, #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_start_8_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #8\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_aad_start_8_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=: \n\t"
        "ld1	{v18.16b, v19.16b}, [%x[aad]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #2\n\t"
        "cmp	w14, #1\n\t"
        "b.gt	L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[aad]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_done_%=: \n\t"
        "and	w14, w8, #15\n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w14\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=\n\t"
        "ldr	x19, [%x[aad]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=\n\t"
        "ldr	w19, [%x[aad]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t"
        "ldrh	w19, [%x[aad]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=\n\t"
        "ldrb	w19, [%x[aad]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=: \n\t"
        /* Load Nonce */
        "cmp	%w[nonceSz], #12\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_ghash_nonce_%=\n\t"
        "ldr	x16, [%x[nonce]]\n\t"
        "movi	v13.4s, #1, lsl 24\n\t"
        "ldr	w17, [%x[nonce], #8]\n\t"
        "mov	v13.d[0], x16\n\t"
        "mov	v13.s[2], w17\n\t"
        "mov	w15, #1\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_ghash_nonce_%=: \n\t"
        "eor	v13.16b, v13.16b, v13.16b\n\t"
        "lsr	w14, %w[nonceSz], #4\n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_nonce_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_nonce_start_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[nonce]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_nonce_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_nonce_done_%=: \n\t"
        "and	w24, %w[nonceSz], #15\n\t"
        "cbz	x24, L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w24\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=\n\t"
        "ldr	x19, [%x[nonce]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=\n\t"
        "ldr	w19, [%x[nonce]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t"
        "ldrh	w19, [%x[nonce]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=\n\t"
        "ldrb	w19, [%x[nonce]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=: \n\t"
        "sub	x11, x11, x24\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=: \n\t"
        "eor	x14, x14, x14\n\t"
        "lsl	x24, %x[nonceSz], #3\n\t"
        "mov	v28.d[0], x14\n\t"
        "mov	v28.d[1], x24\n\t"
        "rev64	v28.16b, v28.16b\n\t"
        "rbit	v28.16b, v28.16b\n\t"
        "eor	v13.16b, v13.16b, v28.16b\n\t"
        "pmull	v28.1q, v13.1d, v22.1d\n\t"
        "pmull2	v29.1q, v13.2d, v22.2d\n\t"
        "ext	v31.16b, v13.16b, v13.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        "rbit	v13.16b, v13.16b\n\t"
        "mov	w15, v13.s[3]\n\t"
        "rev	w15, w15\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=: \n\t"
        "st1	{v13.2d}, [x12]\n\t"
        "lsr	w14, %w[sz], #4\n\t"
        "cmp	w13, #12\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_gcm_decrypt_arm64_crypto_start_256_%=\n\t"
        /* AES_GCM_192 */
#ifndef NO_AES_192
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_192_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "st1	{v14.16b, v15.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=: \n\t"
        "ld1	{v15.16b}, [%x[in]], #16\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=\n\t"
        "eor	v15.16b, v15.16b, v15.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v15.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v15.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.2d}, [x11]\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	w14, w14, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=: \n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	w14, w14, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=: \n\t"
        "cmp	w14, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	w14, w14, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	w14, w14, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=: \n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=\n\t"
        "ld1	{v28.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=: \n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	x17, %x[tagSz]\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	x17, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=\n\t"
        "ldr	x16, [%x[tag]], #8\n\t"
        "sub	x17, x17, #8\n\t"
        "str	x16, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=: \n\t"
        "cmp	x17, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=\n\t"
        "ldr	w16, [%x[tag]], #4\n\t"
        "sub	x17, x17, #4\n\t"
        "str	w16, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=: \n\t"
        "cmp	x17, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t"
        "ldrh	w16, [%x[tag]], #2\n\t"
        "sub	x17, x17, #2\n\t"
        "strh	w16, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=: \n\t"
        "cbz	x17, L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=\n\t"
        "ldrb	w16, [%x[tag]], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "strb	w16, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=: \n\t"
        "sub	x11, x11, %x[tagSz]\n\t"
        "ld1	{v28.2d}, [x11]\n\t"
        "mov	x17, #16\n\t"
        "st1	{v26.2d}, [x11]\n\t"
        "sub	x17, x17, %x[tagSz]\n\t"
        "add	x11, x11, %x[tagSz]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=: \n\t"
        "strb	wzr, [x11], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=\n\t"
        "subs	x11, x11, #16\n\t"
        "ld1	{v26.2d}, [x11]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=: \n\t"
        "eor	v28.16b, v28.16b, v26.16b\n\t"
        "mov	x16, v28.d[0]\n\t"
        "mov	x17, v28.d[1]\n\t"
        "mov	w19, #-180\n\t"
        "orr	x16, x16, x17\n\t"
        "cmp	x16, #0\n\t"
        "csetm	%x[in], ne\n\t"
        "and	%x[in], %x[in], x19\n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_gcm_decrypt_arm64_crypto_done_%=\n\t"
        /* AES_GCM_256 */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_start_256_%=: \n\t"
#ifndef NO_AES_256
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9], #16\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_256_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "st1	{v14.16b, v15.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "st1	{v14.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=\n\t"
        "eor	v15.16b, v15.16b, v15.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v15.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v15.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.2d}, [x11]\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	w14, w14, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=: \n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	w14, w14, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=: \n\t"
        "cmp	w14, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	w14, w14, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	w14, w14, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=: \n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "ldr	q11, [x9, #-32]\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "ldr	q12, [x9, #-16]\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=\n\t"
        "ld1	{v28.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=: \n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	x17, %x[tagSz]\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	x17, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=\n\t"
        "ldr	x16, [%x[tag]], #8\n\t"
        "sub	x17, x17, #8\n\t"
        "str	x16, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=: \n\t"
        "cmp	x17, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=\n\t"
        "ldr	w16, [%x[tag]], #4\n\t"
        "sub	x17, x17, #4\n\t"
        "str	w16, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=: \n\t"
        "cmp	x17, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t"
        "ldrh	w16, [%x[tag]], #2\n\t"
        "sub	x17, x17, #2\n\t"
        "strh	w16, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=: \n\t"
        "cbz	x17, L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=\n\t"
        "ldrb	w16, [%x[tag]], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "strb	w16, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=: \n\t"
        "sub	x11, x11, %x[tagSz]\n\t"
        "ld1	{v28.2d}, [x11]\n\t"
        "mov	x17, #16\n\t"
        "st1	{v26.2d}, [x11]\n\t"
        "sub	x17, x17, %x[tagSz]\n\t"
        "add	x11, x11, %x[tagSz]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=: \n\t"
        "strb	wzr, [x11], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=\n\t"
        "subs	x11, x11, #16\n\t"
        "ld1	{v26.2d}, [x11]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=: \n\t"
        "eor	v28.16b, v28.16b, v26.16b\n\t"
        "mov	x16, v28.d[0]\n\t"
        "mov	x17, v28.d[1]\n\t"
        "mov	w19, #-180\n\t"
        "orr	x16, x16, x17\n\t"
        "cmp	x16, #0\n\t"
        "csetm	%x[in], ne\n\t"
        "and	%x[in], %x[in], x19\n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_gcm_decrypt_arm64_crypto_done_%=\n\t"
        /* AES_GCM_128 */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_start_128_%=: \n\t"
#ifndef NO_AES_128
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d}, [x9], #32\n\t"
        "ld1	{v10.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_128_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "st1	{v14.16b, v15.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "st1	{v14.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=\n\t"
        "eor	v15.16b, v15.16b, v15.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v15.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v15.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.2d}, [x11]\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	w14, w14, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=: \n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	w14, w14, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=: \n\t"
        "cmp	w14, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	w14, w14, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	w14, w14, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=: \n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=\n\t"
        "ld1	{v28.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=: \n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	x17, %x[tagSz]\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	x17, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=\n\t"
        "ldr	x16, [%x[tag]], #8\n\t"
        "sub	x17, x17, #8\n\t"
        "str	x16, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=: \n\t"
        "cmp	x17, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=\n\t"
        "ldr	w16, [%x[tag]], #4\n\t"
        "sub	x17, x17, #4\n\t"
        "str	w16, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=: \n\t"
        "cmp	x17, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t"
        "ldrh	w16, [%x[tag]], #2\n\t"
        "sub	x17, x17, #2\n\t"
        "strh	w16, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=: \n\t"
        "cbz	x17, L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=\n\t"
        "ldrb	w16, [%x[tag]], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "strb	w16, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=: \n\t"
        "sub	x11, x11, %x[tagSz]\n\t"
        "ld1	{v28.2d}, [x11]\n\t"
        "mov	x17, #16\n\t"
        "st1	{v26.2d}, [x11]\n\t"
        "sub	x17, x17, %x[tagSz]\n\t"
        "add	x11, x11, %x[tagSz]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=: \n\t"
        "strb	wzr, [x11], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=\n\t"
        "subs	x11, x11, #16\n\t"
        "ld1	{v26.2d}, [x11]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=: \n\t"
        "eor	v28.16b, v28.16b, v26.16b\n\t"
        "mov	x16, v28.d[0]\n\t"
        "mov	x17, v28.d[1]\n\t"
        "mov	w19, #-180\n\t"
        "orr	x16, x16, x17\n\t"
        "cmp	x16, #0\n\t"
        "csetm	%x[in], ne\n\t"
        "and	%x[in], %x[in], x19\n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_done_%=: \n\t"
        "ldp	x29, x30, [sp], #0x50\n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz),
          [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key),
          [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg),
          [nr] "+r" (nr)
        : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad)
        : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21",
            "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
            "v27", "v28", "v29", "v30", "v31"
    );
    return (word32)(size_t)in;
}

#endif /* HAVE_AES_DECRYPT */
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz,
    const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz, const byte* aad,
    word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr)
{
    __asm__ __volatile__ (
        "stp	x29, x30, [sp, #-80]!\n\t"
        "add	x29, sp, #0\n\t"
        "str	%w[nr], [sp, #72]\n\t"
        "str	%x[reg], [sp, #64]\n\t"
        "str	%x[tmp], [sp, #56]\n\t"
        "str	%x[gcm_h], [sp, #48]\n\t"
        "str	%x[key], [sp, #40]\n\t"
        "str	%w[aadSz], [sp, #32]\n\t"
        "movi	v27.16b, #0x87\n\t"
        "eor	v26.16b, v26.16b, v26.16b\n\t"
        "ushr	v27.2d, v27.2d, #56\n\t"
        "ld1	{v22.2d}, [x10]\n\t"
        "cmp	w8, #0x40\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #32\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=\n\t"
        /* Square H => H^2 */
        "pmull2	v31.1q, v22.2d, v22.2d\n\t"
        "pmull	v30.1q, v22.1d, v22.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v23.16b, v30.16b, v31.16b\n\t"
        "cmp	w8, #0x100\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x40\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=\n\t"
        /* Multiply H and H^2  => H^3 */
        "pmull	v28.1q, v22.1d, v23.1d\n\t"
        "pmull2	v29.1q, v22.2d, v23.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v23.1d\n\t"
        "pmull2	v31.1q, v31.2d, v23.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v24.16b, v28.16b, v30.16b\n\t"
        /* Square H^2 => H^4 */
        "pmull2	v31.1q, v23.2d, v23.2d\n\t"
        "pmull	v30.1q, v23.1d, v23.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v25.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "cmp	w8, #0x400\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x200\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=\n\t"
        /* Multiply H and H^4  => H^5 */
        "pmull	v28.1q, v22.1d, v25.1d\n\t"
        "pmull2	v29.1q, v22.2d, v25.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v25.1d\n\t"
        "pmull2	v31.1q, v31.2d, v25.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v4.16b, v28.16b, v30.16b\n\t"
        /* Square H^3 => H^6 */
        "pmull2	v31.1q, v24.2d, v24.2d\n\t"
        "pmull	v30.1q, v24.1d, v24.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v5.16b, v30.16b, v31.16b\n\t"
        /* Multiply H and H^6  => H^7 */
        "pmull	v28.1q, v22.1d, v5.1d\n\t"
        "pmull2	v29.1q, v22.2d, v5.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v5.1d\n\t"
        "pmull2	v31.1q, v31.2d, v5.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v6.16b, v28.16b, v30.16b\n\t"
        /* Square H^4 => H^8 */
        "pmull2	v31.1q, v25.2d, v25.2d\n\t"
        "pmull	v30.1q, v25.1d, v25.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v7.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=: \n\t"
        "lsr	w14, w8, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t"
        "cmp	w14, #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_8_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #8\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_8_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=: \n\t"
        "ld1	{v18.16b, v19.16b}, [%x[aad]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #2\n\t"
        "cmp	w14, #1\n\t"
        "b.gt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=: \n\t"
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[aad]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=: \n\t"
        "and	w14, w8, #15\n\t"
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w14\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t"
        "ldr	x19, [%x[aad]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t"
        "ldr	w19, [%x[aad]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t"
        "ldrh	w19, [%x[aad]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t"
        "ldrb	w19, [%x[aad]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=: \n\t"
        /* Load Nonce */
        "cmp	%w[nonceSz], #12\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_ghash_nonce_%=\n\t"
        "ldr	x16, [%x[nonce]]\n\t"
        "movi	v13.4s, #1, lsl 24\n\t"
        "ldr	w17, [%x[nonce], #8]\n\t"
        "mov	v13.d[0], x16\n\t"
        "mov	v13.s[2], w17\n\t"
        "mov	w15, #1\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_ghash_nonce_%=: \n\t"
        "eor	v13.16b, v13.16b, v13.16b\n\t"
        "lsr	w14, %w[nonceSz], #4\n\t"
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[nonce]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_done_%=: \n\t"
        "and	w24, %w[nonceSz], #15\n\t"
        "cbz	x24, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w24\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t"
        "ldr	x19, [%x[nonce]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t"
        "ldr	w19, [%x[nonce]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t"
        "ldrh	w19, [%x[nonce]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t"
        "ldrb	w19, [%x[nonce]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=: \n\t"
        "sub	x11, x11, x24\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=: \n\t"
        "eor	x14, x14, x14\n\t"
        "lsl	x24, %x[nonceSz], #3\n\t"
        "mov	v28.d[0], x14\n\t"
        "mov	v28.d[1], x24\n\t"
        "rev64	v28.16b, v28.16b\n\t"
        "rbit	v28.16b, v28.16b\n\t"
        "eor	v13.16b, v13.16b, v28.16b\n\t"
        "pmull	v28.1q, v13.1d, v22.1d\n\t"
        "pmull2	v29.1q, v13.2d, v22.2d\n\t"
        "ext	v31.16b, v13.16b, v13.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        "rbit	v13.16b, v13.16b\n\t"
        "mov	w15, v13.s[3]\n\t"
        "rev	w15, w15\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=: \n\t"
        "st1	{v13.2d}, [x12]\n\t"
        "lsr	w14, %w[sz], #4\n\t"
        "cmp	w13, #12\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=\n\t"
        "b.gt	L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=\n\t"
        /* AES_GCM_192 */
#ifndef NO_AES_192
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "rev	w23, w23\n\t"
        "rev	w22, w22\n\t"
        "rev	w21, w21\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /* Done GHASH */
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /* Done GHASH */
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "st1	{v18.16b, v19.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "st1	{v18.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=\n\t"
        "eor	v16.16b, v16.16b, v16.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v16.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "mov	w19, w14\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=: \n\t"
        "mov	x17, #16\n\t"
        "sub	x17, x17, x14\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=: \n\t"
        "subs	x17, x17, #1\n\t"
        "strb	wzr, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=\n\t"
        "sub	x11, x11, #16\n\t"
        "ld1	{v14.2d}, [x11]\n\t"
        "rbit	v14.16b, v14.16b\n\t"
        "eor	v15.16b, v26.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v15.1d, v22.1d\n\t"
        "pmull2	v29.1q, v15.2d, v22.2d\n\t"
        "ext	v31.16b, v15.16b, v15.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=\n\t"
        "st1	{v26.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=: \n\t"
        "st1	{v26.16b}, [x11]\n\t"
        "cmp	%w[tagSz], #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t"
        "ldr	x16, [x11], #8\n\t"
        "sub	%w[tagSz], %w[tagSz], #8\n\t"
        "str	x16, [%x[tag]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=: \n\t"
        "cmp	%w[tagSz], #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t"
        "ldr	w16, [x11], #4\n\t"
        "sub	%w[tagSz], %w[tagSz], #4\n\t"
        "str	w16, [%x[tag]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=: \n\t"
        "cmp	%w[tagSz], #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t"
        "ldrh	w16, [x11], #2\n\t"
        "sub	%w[tagSz], %w[tagSz], #2\n\t"
        "strh	w16, [%x[tag]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=: \n\t"
        "cbz	%w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t"
        "ldrb	w16, [x11], #1\n\t"
        "subs	%w[tagSz], %w[tagSz], #1\n\t"
        "strb	w16, [%x[tag]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=: \n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t"
        /* AES_GCM_256 */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=: \n\t"
#ifndef NO_AES_256
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "rev	w23, w23\n\t"
        "rev	w22, w22\n\t"
        "rev	w21, w21\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /* Done GHASH */
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9], #16\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /* Done GHASH */
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "st1	{v18.16b, v19.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "st1	{v18.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=\n\t"
        "eor	v16.16b, v16.16b, v16.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v16.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "mov	w19, w14\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=: \n\t"
        "mov	x17, #16\n\t"
        "sub	x17, x17, x14\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=: \n\t"
        "subs	x17, x17, #1\n\t"
        "strb	wzr, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=\n\t"
        "sub	x11, x11, #16\n\t"
        "ld1	{v14.2d}, [x11]\n\t"
        "rbit	v14.16b, v14.16b\n\t"
        "eor	v15.16b, v26.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v15.1d, v22.1d\n\t"
        "pmull2	v29.1q, v15.2d, v22.2d\n\t"
        "ext	v31.16b, v15.16b, v15.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "ldr	q11, [x9, #-32]\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "ldr	q12, [x9, #-16]\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=\n\t"
        "st1	{v26.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=: \n\t"
        "st1	{v26.16b}, [x11]\n\t"
        "cmp	%w[tagSz], #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t"
        "ldr	x16, [x11], #8\n\t"
        "sub	%w[tagSz], %w[tagSz], #8\n\t"
        "str	x16, [%x[tag]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=: \n\t"
        "cmp	%w[tagSz], #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t"
        "ldr	w16, [x11], #4\n\t"
        "sub	%w[tagSz], %w[tagSz], #4\n\t"
        "str	w16, [%x[tag]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=: \n\t"
        "cmp	%w[tagSz], #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t"
        "ldrh	w16, [x11], #2\n\t"
        "sub	%w[tagSz], %w[tagSz], #2\n\t"
        "strh	w16, [%x[tag]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=: \n\t"
        "cbz	%w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t"
        "ldrb	w16, [x11], #1\n\t"
        "subs	%w[tagSz], %w[tagSz], #1\n\t"
        "strb	w16, [%x[tag]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=: \n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t"
        /* AES_GCM_128 */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=: \n\t"
#ifndef NO_AES_128
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "rev	w23, w23\n\t"
        "rev	w22, w22\n\t"
        "rev	w21, w21\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w24\n\t"
        "mov	v15.s[3], w23\n\t"
        "mov	v16.s[3], w22\n\t"
        "mov	v17.s[3], w21\n\t"
        "mov	v8.s[3], w20\n\t"
        "mov	v9.s[3], w19\n\t"
        "mov	v10.s[3], w17\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /* Done GHASH */
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d}, [x9], #32\n\t"
        "ld1	{v10.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w19, w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w19\n\t"
        "mov	v16.s[3], w17\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /* Done GHASH */
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w20\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "st1	{v18.16b, v19.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "st1	{v18.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=\n\t"
        "eor	v16.16b, v16.16b, v16.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v16.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "eor	v16.16b, v16.16b, v14.16b\n\t"
        "st1	{v16.2d}, [x11]\n\t"
        "mov	w19, w14\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=: \n\t"
        "mov	x17, #16\n\t"
        "sub	x17, x17, x14\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=: \n\t"
        "subs	x17, x17, #1\n\t"
        "strb	wzr, [x11], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=\n\t"
        "sub	x11, x11, #16\n\t"
        "ld1	{v14.2d}, [x11]\n\t"
        "rbit	v14.16b, v14.16b\n\t"
        "eor	v15.16b, v26.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v15.1d, v22.1d\n\t"
        "pmull2	v29.1q, v15.2d, v22.2d\n\t"
        "ext	v31.16b, v15.16b, v15.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=\n\t"
        "st1	{v26.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=: \n\t"
        "st1	{v26.16b}, [x11]\n\t"
        "cmp	%w[tagSz], #8\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t"
        "ldr	x16, [x11], #8\n\t"
        "sub	%w[tagSz], %w[tagSz], #8\n\t"
        "str	x16, [%x[tag]], #8\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=: \n\t"
        "cmp	%w[tagSz], #4\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t"
        "ldr	w16, [x11], #4\n\t"
        "sub	%w[tagSz], %w[tagSz], #4\n\t"
        "str	w16, [%x[tag]], #4\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=: \n\t"
        "cmp	%w[tagSz], #2\n\t"
        "b.lt	L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t"
        "ldrh	w16, [x11], #2\n\t"
        "sub	%w[tagSz], %w[tagSz], #2\n\t"
        "strh	w16, [%x[tag]], #2\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=: \n\t"
        "cbz	%w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t"
        "ldrb	w16, [x11], #1\n\t"
        "subs	%w[tagSz], %w[tagSz], #1\n\t"
        "strb	w16, [%x[tag]], #1\n\t"
        "b.ne	L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=: \n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=: \n\t"
        "ldp	x29, x30, [sp], #0x50\n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz),
          [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz),
          [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp),
          [reg] "+r" (reg), [nr] "+r" (nr)
        : [in] "r" (in), [nonce] "r" (nonce), [aad] "r" (aad)
        : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21",
            "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
            "v27", "v28", "v29", "v30", "v31"
    );
}

#ifdef HAVE_AES_DECRYPT
int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz,
    const byte* nonce, word32 nonceSz, const byte* tag, word32 tagSz,
    const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg,
    int nr)
{
    __asm__ __volatile__ (
        "stp	x29, x30, [sp, #-80]!\n\t"
        "add	x29, sp, #0\n\t"
        "str	%w[nr], [sp, #72]\n\t"
        "str	%x[reg], [sp, #64]\n\t"
        "str	%x[tmp], [sp, #56]\n\t"
        "str	%x[gcm_h], [sp, #48]\n\t"
        "str	%x[key], [sp, #40]\n\t"
        "str	%w[aadSz], [sp, #32]\n\t"
        "movi	v27.16b, #0x87\n\t"
        "eor	v26.16b, v26.16b, v26.16b\n\t"
        "ushr	v27.2d, v27.2d, #56\n\t"
        "ld1	{v22.2d}, [x10]\n\t"
        "cmp	w8, #0x40\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #32\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=\n\t"
        /* Square H => H^2 */
        "pmull2	v31.1q, v22.2d, v22.2d\n\t"
        "pmull	v30.1q, v22.1d, v22.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v23.16b, v30.16b, v31.16b\n\t"
        "cmp	w8, #0x100\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x40\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=\n\t"
        /* Multiply H and H^2  => H^3 */
        "pmull	v28.1q, v22.1d, v23.1d\n\t"
        "pmull2	v29.1q, v22.2d, v23.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v23.1d\n\t"
        "pmull2	v31.1q, v31.2d, v23.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v24.16b, v28.16b, v30.16b\n\t"
        /* Square H^2 => H^4 */
        "pmull2	v31.1q, v23.2d, v23.2d\n\t"
        "pmull	v30.1q, v23.1d, v23.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v25.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "cmp	w8, #0x400\n\t"
        "csetm	x16, lt\n\t"
        "cmp	%w[sz], #0x200\n\t"
        "csetm	x17, lt\n\t"
        "ands	x16, x16, x17\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=\n\t"
        /* Multiply H and H^4  => H^5 */
        "pmull	v28.1q, v22.1d, v25.1d\n\t"
        "pmull2	v29.1q, v22.2d, v25.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v25.1d\n\t"
        "pmull2	v31.1q, v31.2d, v25.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v4.16b, v28.16b, v30.16b\n\t"
        /* Square H^3 => H^6 */
        "pmull2	v31.1q, v24.2d, v24.2d\n\t"
        "pmull	v30.1q, v24.1d, v24.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v5.16b, v30.16b, v31.16b\n\t"
        /* Multiply H and H^6  => H^7 */
        "pmull	v28.1q, v22.1d, v5.1d\n\t"
        "pmull2	v29.1q, v22.2d, v5.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v5.1d\n\t"
        "pmull2	v31.1q, v31.2d, v5.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v6.16b, v28.16b, v30.16b\n\t"
        /* Square H^4 => H^8 */
        "pmull2	v31.1q, v25.2d, v25.2d\n\t"
        "pmull	v30.1q, v25.1d, v25.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v7.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=: \n\t"
        "lsr	w14, w8, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t"
        "cmp	w14, #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_8_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #8\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_8_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t"
        "cmp	w14, #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=: \n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #4\n\t"
        "cmp	w14, #4\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=: \n\t"
        "ld1	{v18.16b, v19.16b}, [%x[aad]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "sub	w14, w14, #2\n\t"
        "cmp	w14, #1\n\t"
        "b.gt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[aad]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=: \n\t"
        "and	w14, w8, #15\n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w14\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t"
        "ldr	x19, [%x[aad]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t"
        "ldr	w19, [%x[aad]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t"
        "ldrh	w19, [%x[aad]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t"
        "ldrb	w19, [%x[aad]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=: \n\t"
        /* Load Nonce */
        "cmp	%w[nonceSz], #12\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_ghash_nonce_%=\n\t"
        "ldr	x16, [%x[nonce]]\n\t"
        "movi	v13.4s, #1, lsl 24\n\t"
        "ldr	w17, [%x[nonce], #8]\n\t"
        "mov	v13.d[0], x16\n\t"
        "mov	v13.s[2], w17\n\t"
        "mov	w15, #1\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_ghash_nonce_%=: \n\t"
        "eor	v13.16b, v13.16b, v13.16b\n\t"
        "lsr	w14, %w[nonceSz], #4\n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_1_%=: \n\t"
        "ld1	{v18.16b}, [%x[nonce]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "subs	w14, w14, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_done_%=: \n\t"
        "and	w24, %w[nonceSz], #15\n\t"
        "cbz	x24, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	w20, w24\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	w20, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t"
        "ldr	x19, [%x[nonce]], #8\n\t"
        "sub	w20, w20, #8\n\t"
        "str	x19, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=: \n\t"
        "cmp	w20, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t"
        "ldr	w19, [%x[nonce]], #4\n\t"
        "sub	w20, w20, #4\n\t"
        "str	w19, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=: \n\t"
        "cmp	w20, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t"
        "ldrh	w19, [%x[nonce]], #2\n\t"
        "sub	w20, w20, #2\n\t"
        "strh	w19, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=: \n\t"
        "cbz	w20, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t"
        "ldrb	w19, [%x[nonce]], #1\n\t"
        "subs	w20, w20, #1\n\t"
        "strb	w19, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=: \n\t"
        "sub	x11, x11, x24\n\t"
        "ld1	{v18.2d}, [x11]\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v13.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=: \n\t"
        "eor	x14, x14, x14\n\t"
        "lsl	x24, %x[nonceSz], #3\n\t"
        "mov	v28.d[0], x14\n\t"
        "mov	v28.d[1], x24\n\t"
        "rev64	v28.16b, v28.16b\n\t"
        "rbit	v28.16b, v28.16b\n\t"
        "eor	v13.16b, v13.16b, v28.16b\n\t"
        "pmull	v28.1q, v13.1d, v22.1d\n\t"
        "pmull2	v29.1q, v13.2d, v22.2d\n\t"
        "ext	v31.16b, v13.16b, v13.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v13.16b, v28.16b, v30.16b\n\t"
        "rbit	v13.16b, v13.16b\n\t"
        "mov	w15, v13.s[3]\n\t"
        "rev	w15, w15\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=: \n\t"
        "st1	{v13.2d}, [x12]\n\t"
        "lsr	w14, %w[sz], #4\n\t"
        "cmp	w13, #12\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=\n\t"
        "b.gt	L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=\n\t"
        /* AES_GCM_192 */
#ifndef NO_AES_192
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /* Done GHASH */
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /* Done GHASH */
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "st1	{v14.16b, v15.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=: \n\t"
        "ld1	{v15.16b}, [%x[in]], #16\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        /* Done GHASH */
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.16b}, [%x[out]], #16\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=\n\t"
        "eor	v15.16b, v15.16b, v15.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v15.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v15.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        /* Done GHASH */
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.2d}, [x11]\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	w14, w14, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=: \n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	w14, w14, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=: \n\t"
        "cmp	w14, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	w14, w14, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	w14, w14, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=: \n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=\n\t"
        "ld1	{v28.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=: \n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	x17, %x[tagSz]\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	x17, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t"
        "ldr	x16, [%x[tag]], #8\n\t"
        "sub	x17, x17, #8\n\t"
        "str	x16, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=: \n\t"
        "cmp	x17, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t"
        "ldr	w16, [%x[tag]], #4\n\t"
        "sub	x17, x17, #4\n\t"
        "str	w16, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=: \n\t"
        "cmp	x17, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t"
        "ldrh	w16, [%x[tag]], #2\n\t"
        "sub	x17, x17, #2\n\t"
        "strh	w16, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=: \n\t"
        "cbz	x17, L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t"
        "ldrb	w16, [%x[tag]], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "strb	w16, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=: \n\t"
        "sub	x11, x11, %x[tagSz]\n\t"
        "ld1	{v28.2d}, [x11]\n\t"
        "mov	x17, #16\n\t"
        "st1	{v26.2d}, [x11]\n\t"
        "sub	x17, x17, %x[tagSz]\n\t"
        "add	x11, x11, %x[tagSz]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=: \n\t"
        "strb	wzr, [x11], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=\n\t"
        "subs	x11, x11, #16\n\t"
        "ld1	{v26.2d}, [x11]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=: \n\t"
        "eor	v28.16b, v28.16b, v26.16b\n\t"
        "mov	x16, v28.d[0]\n\t"
        "mov	x17, v28.d[1]\n\t"
        "mov	w19, #-180\n\t"
        "orr	x16, x16, x17\n\t"
        "cmp	x16, #0\n\t"
        "csetm	%x[in], ne\n\t"
        "and	%x[in], %x[in], x19\n\t"
#endif /* !NO_AES_192 */
        "b	L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=\n\t"
        /* AES_GCM_256 */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=: \n\t"
#ifndef NO_AES_256
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /* Done GHASH */
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #208]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #224]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t"
        "ld1	{v12.2d}, [x9], #16\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /* Done GHASH */
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v10.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v10.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v11.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v11.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "aese	v16.16b, v29.16b\n\t"
        "eor	v16.16b, v16.16b, v30.16b\n\t"
        "aese	v17.16b, v29.16b\n\t"
        "eor	v17.16b, v17.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v10.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v11.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v29.2d, v30.2d}, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "aese	v15.16b, v29.16b\n\t"
        "eor	v15.16b, v15.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "st1	{v14.16b, v15.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "st1	{v14.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=\n\t"
        "eor	v15.16b, v15.16b, v15.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v15.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v15.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        /* Done GHASH */
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.2d}, [x11]\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	w14, w14, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=: \n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	w14, w14, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=: \n\t"
        "cmp	w14, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	w14, w14, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	w14, w14, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=: \n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "ldr	q11, [x9, #-32]\n\t"
        "aese	v14.16b, v10.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "ldr	q12, [x9, #-16]\n\t"
        "aese	v14.16b, v11.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ldr	q29, [x9]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "ldr	q30, [x9, #16]\n\t"
        "aese	v14.16b, v29.16b\n\t"
        "eor	v14.16b, v14.16b, v30.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=\n\t"
        "ld1	{v28.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=: \n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	x17, %x[tagSz]\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	x17, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t"
        "ldr	x16, [%x[tag]], #8\n\t"
        "sub	x17, x17, #8\n\t"
        "str	x16, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=: \n\t"
        "cmp	x17, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t"
        "ldr	w16, [%x[tag]], #4\n\t"
        "sub	x17, x17, #4\n\t"
        "str	w16, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=: \n\t"
        "cmp	x17, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t"
        "ldrh	w16, [%x[tag]], #2\n\t"
        "sub	x17, x17, #2\n\t"
        "strh	w16, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=: \n\t"
        "cbz	x17, L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t"
        "ldrb	w16, [%x[tag]], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "strb	w16, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=: \n\t"
        "sub	x11, x11, %x[tagSz]\n\t"
        "ld1	{v28.2d}, [x11]\n\t"
        "mov	x17, #16\n\t"
        "st1	{v26.2d}, [x11]\n\t"
        "sub	x17, x17, %x[tagSz]\n\t"
        "add	x11, x11, %x[tagSz]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=: \n\t"
        "strb	wzr, [x11], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=\n\t"
        "subs	x11, x11, #16\n\t"
        "ld1	{v26.2d}, [x11]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=: \n\t"
        "eor	v28.16b, v28.16b, v26.16b\n\t"
        "mov	x16, v28.d[0]\n\t"
        "mov	x17, v28.d[1]\n\t"
        "mov	w19, #-180\n\t"
        "orr	x16, x16, x17\n\t"
        "cmp	x16, #0\n\t"
        "csetm	%x[in], ne\n\t"
        "and	%x[in], %x[in], x19\n\t"
#endif /* !NO_AES_256 */
        "b	L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=\n\t"
        /* AES_GCM_128 */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=: \n\t"
#ifndef NO_AES_128
        "cmp	w14, #32\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w24, w24\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=: \n\t"
        "ldr	q12, [x9]\n\t"
        "add	w24, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w23, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w22, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w21, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w20, w15, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w19, w15, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w17, w15, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w15, w15, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w24, w24\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w24\n\t"
        "rev	w23, w23\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w23\n\t"
        "rev	w22, w22\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w22\n\t"
        "rev	w21, w21\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "mov	v17.s[3], w21\n\t"
        "rev	w20, w20\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "mov	v8.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "mov	v9.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "mov	v10.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "mov	v11.s[3], w16\n\t"
        "ldr	q13, [x9, #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w14, w14, #8\n\t"
        "ldr	q12, [x9, #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /* Done GHASH */
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [x9, #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [x9, #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [x12]\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "eor	v8.16b, v8.16b, v0.16b\n\t"
        "eor	v9.16b, v9.16b, v1.16b\n\t"
        "eor	v10.16b, v10.16b, v2.16b\n\t"
        "eor	v11.16b, v11.16b, v3.16b\n\t"
        "st1	{v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t"
        "cmp	w14, #8\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t"
        "ld1	{v8.2d, v9.2d}, [x9], #32\n\t"
        "ld1	{v10.2d}, [x9]\n\t"
        "cmp	w14, #1\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=\n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_2_%=\n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_4_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w19, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w17, w15, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w15, w15, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w20, w20\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w19, w19\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "mov	v15.s[3], w19\n\t"
        "rev	w17, w17\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "mov	v16.s[3], w17\n\t"
        "rev	w16, w15\n\t"
        "mov	v17.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v2.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v2.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v16.16b, v3.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v17.16b, v3.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "aese	v16.16b, v4.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v4.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v5.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v5.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v16.16b, v6.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v17.16b, v6.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "subs	w14, w14, #4\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /* Done GHASH */
        "aese	v16.16b, v7.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v7.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[in]], #0x40\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v8.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v8.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "aese	v16.16b, v9.16b\n\t"
        "eor	v16.16b, v16.16b, v10.16b\n\t"
        "aese	v17.16b, v9.16b\n\t"
        "eor	v17.16b, v17.16b, v10.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "eor	v16.16b, v16.16b, v20.16b\n\t"
        "eor	v17.16b, v17.16b, v21.16b\n\t"
        "cmp	w14, #4\n\t"
        "st1	{v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t"
        "b.ge	L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_4_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_4_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v20.1d\n\t"
        "pmull2	v26.1q, v23.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v19.1d\n\t"
        "pmull2	v26.1q, v24.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v18.1d\n\t"
        "pmull2	v26.1q, v25.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cmp	w14, #1\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_2_%=: \n\t"
        "add	w20, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w15, w15, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "rev	w20, w20\n\t"
        "mov	v14.s[3], w20\n\t"
        "rev	w16, w15\n\t"
        "mov	v15.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v3.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v4.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v5.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v6.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "subs	w14, w14, #2\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v7.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v8.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v9.16b\n\t"
        "eor	v15.16b, v15.16b, v10.16b\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "eor	v15.16b, v15.16b, v19.16b\n\t"
        "st1	{v14.16b, v15.16b}, [%x[out]], #32\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v19.1d, v22.1d\n\t"
        "pmull2	v29.1q, v19.2d, v22.2d\n\t"
        "ext	v31.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v21.1d\n\t"
        "pmull2	v26.1q, v23.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor3	v30.16b, v30.16b, v26.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=: \n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "eor	v14.16b, v14.16b, v18.16b\n\t"
        "st1	{v14.16b}, [%x[out]], #16\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "eor	v21.16b, v26.16b, v18.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v21.1d, v22.1d\n\t"
        "pmull2	v29.1q, v21.2d, v22.2d\n\t"
        "ext	v31.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=: \n\t"
        "ands	w14, %w[sz], #15\n\t"
        "b.eq	L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=\n\t"
        "eor	v15.16b, v15.16b, v15.16b\n\t"
        "mov	w19, w14\n\t"
        "st1	{v15.2d}, [x11]\n\t"
        "cmp	x19, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=\n\t"
        "ldr	x17, [%x[in]], #8\n\t"
        "sub	x19, x19, #8\n\t"
        "str	x17, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=: \n\t"
        "cmp	x19, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=\n\t"
        "ldr	w17, [%x[in]], #4\n\t"
        "sub	x19, x19, #4\n\t"
        "str	w17, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=: \n\t"
        "cmp	x19, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t"
        "ldrh	w17, [%x[in]], #2\n\t"
        "sub	x19, x19, #2\n\t"
        "strh	w17, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=: \n\t"
        "cbz	x19, L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t"
        "ldrb	w17, [%x[in]], #1\n\t"
        "subs	x19, x19, #1\n\t"
        "strb	w17, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=: \n\t"
        "sub	x11, x11, x14\n\t"
        "ld1	{v15.2d}, [x11]\n\t"
        "add	w15, w15, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "rbit	v15.16b, v15.16b\n\t"
        "rev	w16, w15\n\t"
        "mov	v14.s[3], w16\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v16.16b, v26.16b, v15.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v16.1d, v22.1d\n\t"
        "pmull2	v29.1q, v16.2d, v22.2d\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v31.16b, v16.16b, v16.16b, #8\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "rbit	v15.16b, v15.16b\n\t"
        "eor	v14.16b, v14.16b, v15.16b\n\t"
        "st1	{v14.2d}, [x11]\n\t"
        "cmp	w14, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t"
        "ldr	x17, [x11], #8\n\t"
        "sub	w14, w14, #8\n\t"
        "str	x17, [%x[out]], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=: \n\t"
        "cmp	w14, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t"
        "ldr	w17, [x11], #4\n\t"
        "sub	w14, w14, #4\n\t"
        "str	w17, [%x[out]], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=: \n\t"
        "cmp	w14, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t"
        "ldrh	w17, [x11], #2\n\t"
        "sub	w14, w14, #2\n\t"
        "strh	w17, [%x[out]], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=: \n\t"
        "cbz	w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t"
        "ldrb	w17, [x11], #1\n\t"
        "subs	w14, w14, #1\n\t"
        "strb	w17, [%x[out]], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=: \n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=: \n\t"
        "ld1	{v14.2d}, [x12]\n\t"
        "lsl	x8, x8, #3\n\t"
        "rbit	x8, x8\n\t"
        "mov	v28.d[0], x8\n\t"
        "lsl	%x[sz], %x[sz], #3\n\t"
        "rbit	%x[sz], %x[sz]\n\t"
        "mov	v28.d[1], %x[sz]\n\t"
        "eor	v26.16b, v26.16b, v28.16b\n\t"
        "pmull	v28.1q, v26.1d, v22.1d\n\t"
        "pmull2	v29.1q, v26.2d, v22.2d\n\t"
        "ext	v31.16b, v26.16b, v26.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v14.16b, v3.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor3	v31.16b, v31.16b, v29.16b, v30.16b\n\t"
        "aese	v14.16b, v4.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v14.16b, v5.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v14.16b, v6.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        "aese	v14.16b, v7.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v14.16b, v8.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "rbit	v26.16b, v26.16b\n\t"
        "aese	v14.16b, v9.16b\n\t"
        "eor	v14.16b, v14.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v14.16b\n\t"
        "cmp	%w[tagSz], #16\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=\n\t"
        "ld1	{v28.16b}, [%x[tag]]\n\t"
        "b	L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=: \n\t"
        "eor	v28.16b, v28.16b, v28.16b\n\t"
        "mov	x17, %x[tagSz]\n\t"
        "st1	{v28.2d}, [x11]\n\t"
        "cmp	x17, #8\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t"
        "ldr	x16, [%x[tag]], #8\n\t"
        "sub	x17, x17, #8\n\t"
        "str	x16, [x11], #8\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=: \n\t"
        "cmp	x17, #4\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t"
        "ldr	w16, [%x[tag]], #4\n\t"
        "sub	x17, x17, #4\n\t"
        "str	w16, [x11], #4\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=: \n\t"
        "cmp	x17, #2\n\t"
        "b.lt	L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t"
        "ldrh	w16, [%x[tag]], #2\n\t"
        "sub	x17, x17, #2\n\t"
        "strh	w16, [x11], #2\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=: \n\t"
        "cbz	x17, L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t"
        "ldrb	w16, [%x[tag]], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "strb	w16, [x11], #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=: \n\t"
        "sub	x11, x11, %x[tagSz]\n\t"
        "ld1	{v28.2d}, [x11]\n\t"
        "mov	x17, #16\n\t"
        "st1	{v26.2d}, [x11]\n\t"
        "sub	x17, x17, %x[tagSz]\n\t"
        "add	x11, x11, %x[tagSz]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=: \n\t"
        "strb	wzr, [x11], #1\n\t"
        "subs	x17, x17, #1\n\t"
        "b.ne	L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=\n\t"
        "subs	x11, x11, #16\n\t"
        "ld1	{v26.2d}, [x11]\n\t"
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=: \n\t"
        "eor	v28.16b, v28.16b, v26.16b\n\t"
        "mov	x16, v28.d[0]\n\t"
        "mov	x17, v28.d[1]\n\t"
        "mov	w19, #-180\n\t"
        "orr	x16, x16, x17\n\t"
        "cmp	x16, #0\n\t"
        "csetm	%x[in], ne\n\t"
        "and	%x[in], %x[in], x19\n\t"
#endif /* !NO_AES_128 */
        "\n"
    "L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=: \n\t"
        "ldp	x29, x30, [sp], #0x50\n\t"
        : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz),
          [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key),
          [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg),
          [nr] "+r" (nr)
        : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad)
        : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21",
            "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
            "v27", "v28", "v29", "v30", "v31"
    );
    return (word32)(size_t)in;
}

#endif /* HAVE_AES_DECRYPT */
#endif /* !WOLFSSL_ARMASM_CRYPTO_SHA3 */
#ifdef WOLFSSL_AESGCM_STREAM
void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz,
    byte* gcm_h, byte* counter, byte* initCtr)
{
    __asm__ __volatile__ (
        "movi	v6.16b, #0x87\n\t"
        "ld1	{v5.2d}, [%x[gcm_h]]\n\t"
        "ushr	v6.2d, v6.2d, #56\n\t"
        /* Load Nonce */
        "cmp	%w[nonceSz], #12\n\t"
        "b.ne	L_aes_gcm_init_arm64_crypto_ghash_nonce_%=\n\t"
        "ldr	x9, [%x[nonce]]\n\t"
        "movi	v4.4s, #1, lsl 24\n\t"
        "ldr	w10, [%x[nonce], #8]\n\t"
        "mov	v4.d[0], x9\n\t"
        "mov	v4.s[2], w10\n\t"
        "mov	w8, #1\n\t"
        "b	L_aes_gcm_init_arm64_crypto_done_nonce_%=\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_ghash_nonce_%=: \n\t"
        "eor	v4.16b, v4.16b, v4.16b\n\t"
        "lsr	w7, %w[nonceSz], #4\n\t"
        "cbz	w7, L_aes_gcm_init_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_start_1_%=: \n\t"
        "ld1	{v0.16b}, [%x[nonce]], #16\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "eor	v3.16b, v4.16b, v0.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v7.1q, v3.1d, v5.1d\n\t"
        "pmull2	v8.1q, v3.2d, v5.2d\n\t"
        "ext	v10.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v9.1q, v10.1d, v5.1d\n\t"
        "pmull2	v10.1q, v10.2d, v5.2d\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        /*   Reduce */
        "ext	v10.16b, v7.16b, v8.16b, #8\n\t"
        "pmull2	v8.1q, v8.2d, v6.2d\n\t"
        "eor	v10.16b, v10.16b, v8.16b\n\t"
        "eor	v10.16b, v10.16b, v9.16b\n\t"
        "pmull2	v9.1q, v10.2d, v6.2d\n\t"
        "mov	v7.d[1], v10.d[0]\n\t"
        "eor	v4.16b, v7.16b, v9.16b\n\t"
        /* Done GHASH */
        "subs	w7, w7, #1\n\t"
        "b.ne	L_aes_gcm_init_arm64_crypto_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_done_%=: \n\t"
        "and	w13, %w[nonceSz], #15\n\t"
        "cbz	x13, L_aes_gcm_init_arm64_crypto_partial_done_%=\n\t"
        "eor	v7.16b, v7.16b, v7.16b\n\t"
        "mov	w12, w13\n\t"
        "st1	{v7.2d}, [%x[initCtr]]\n\t"
        "cmp	w12, #8\n\t"
        "b.lt	L_aes_gcm_init_arm64_crypto_start_dw_%=\n\t"
        "ldr	x11, [%x[nonce]], #8\n\t"
        "sub	w12, w12, #8\n\t"
        "str	x11, [%x[initCtr]], #8\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_start_dw_%=: \n\t"
        "cmp	w12, #4\n\t"
        "b.lt	L_aes_gcm_init_arm64_crypto_start_sw_%=\n\t"
        "ldr	w11, [%x[nonce]], #4\n\t"
        "sub	w12, w12, #4\n\t"
        "str	w11, [%x[initCtr]], #4\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_start_sw_%=: \n\t"
        "cmp	w12, #2\n\t"
        "b.lt	L_aes_gcm_init_arm64_crypto_start_byte_%=\n\t"
        "ldrh	w11, [%x[nonce]], #2\n\t"
        "sub	w12, w12, #2\n\t"
        "strh	w11, [%x[initCtr]], #2\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_start_byte_%=: \n\t"
        "cbz	w12, L_aes_gcm_init_arm64_crypto_end_bytes_%=\n\t"
        "ldrb	w11, [%x[nonce]], #1\n\t"
        "subs	w12, w12, #1\n\t"
        "strb	w11, [%x[initCtr]], #1\n\t"
        "b.ne	L_aes_gcm_init_arm64_crypto_start_byte_%=\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_end_bytes_%=: \n\t"
        "sub	%x[initCtr], %x[initCtr], x13\n\t"
        "ld1	{v0.2d}, [%x[initCtr]]\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "eor	v3.16b, v4.16b, v0.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v7.1q, v3.1d, v5.1d\n\t"
        "pmull2	v8.1q, v3.2d, v5.2d\n\t"
        "ext	v10.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v9.1q, v10.1d, v5.1d\n\t"
        "pmull2	v10.1q, v10.2d, v5.2d\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        /*   Reduce */
        "ext	v10.16b, v7.16b, v8.16b, #8\n\t"
        "pmull2	v8.1q, v8.2d, v6.2d\n\t"
        "eor	v10.16b, v10.16b, v8.16b\n\t"
        "eor	v10.16b, v10.16b, v9.16b\n\t"
        "pmull2	v9.1q, v10.2d, v6.2d\n\t"
        "mov	v7.d[1], v10.d[0]\n\t"
        "eor	v4.16b, v7.16b, v9.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_init_arm64_crypto_partial_done_%=: \n\t"
        "eor	x7, x7, x7\n\t"
        "lsl	x13, %x[nonceSz], #3\n\t"
        "mov	v7.d[0], x7\n\t"
        "mov	v7.d[1], x13\n\t"
        "rev64	v7.16b, v7.16b\n\t"
        "rbit	v7.16b, v7.16b\n\t"
        "eor	v4.16b, v4.16b, v7.16b\n\t"
        "pmull	v7.1q, v4.1d, v5.1d\n\t"
        "pmull2	v8.1q, v4.2d, v5.2d\n\t"
        "ext	v10.16b, v4.16b, v4.16b, #8\n\t"
        "pmull	v9.1q, v10.1d, v5.1d\n\t"
        "pmull2	v10.1q, v10.2d, v5.2d\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "ext	v10.16b, v7.16b, v8.16b, #8\n\t"
        "pmull2	v8.1q, v8.2d, v6.2d\n\t"
        "eor	v10.16b, v10.16b, v8.16b\n\t"
        "eor	v10.16b, v10.16b, v9.16b\n\t"
        "pmull2	v9.1q, v10.2d, v6.2d\n\t"
        "mov	v7.d[1], v10.d[0]\n\t"
        "eor	v4.16b, v7.16b, v9.16b\n\t"
        "rbit	v4.16b, v4.16b\n\t"
        "mov	w8, v4.s[3]\n\t"
        "rev	w8, w8\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_done_nonce_%=: \n\t"
        "st1	{v4.2d}, [%x[counter]]\n\t"
        "ld1	{v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t"
        "aese	v4.16b, v7.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v8.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v9.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v10.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "ld1	{v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t"
        "aese	v4.16b, v7.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v8.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v9.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v10.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "subs	%w[nr], %w[nr], #10\n\t"
        "ld1	{v7.2d, v8.2d}, [%x[key]], #32\n\t"
        "aese	v4.16b, v7.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v8.16b\n\t"
        "b.eq	L_aes_gcm_init_arm64_crypto_round_done_%=\n\t"
        "ld1	{v7.2d, v8.2d}, [%x[key]], #32\n\t"
        "subs	%w[nr], %w[nr], #2\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v7.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v8.16b\n\t"
        "b.eq	L_aes_gcm_init_arm64_crypto_round_done_%=\n\t"
        "ld1	{v7.2d, v8.2d}, [%x[key]], #32\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v7.16b\n\t"
        "aesmc	v4.16b, v4.16b\n\t"
        "aese	v4.16b, v8.16b\n\t"
        "\n"
    "L_aes_gcm_init_arm64_crypto_round_done_%=: \n\t"
        "ld1	{v7.2d}, [%x[key]]\n\t"
        "eor	v4.16b, v4.16b, v7.16b\n\t"
        "st1	{v4.2d}, [%x[initCtr]]\n\t"
        : [key] "+r" (key), [nr] "+r" (nr), [nonceSz] "+r" (nonceSz),
          [gcm_h] "+r" (gcm_h), [counter] "+r" (counter),
          [initCtr] "+r" (initCtr)
        : [nonce] "r" (nonce)
        : "memory", "cc", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "v0",
            "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10"
    );
}

void AES_GCM_ghash_block_AARCH64(const byte* data, byte* tag, byte* gcm_h)
{
    __asm__ __volatile__ (
        "ld1	{v6.2d}, [%x[tag]]\n\t"
        "movi	v7.16b, #0x87\n\t"
        "ld1	{v5.2d}, [%x[gcm_h]]\n\t"
        "ushr	v7.2d, v7.2d, #56\n\t"
        "ld1	{v4.2d}, [%x[data]]\n\t"
        "rbit	v4.16b, v4.16b\n\t"
        "eor	v8.16b, v6.16b, v4.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v0.1q, v8.1d, v5.1d\n\t"
        "pmull2	v1.1q, v8.2d, v5.2d\n\t"
        "ext	v3.16b, v8.16b, v8.16b, #8\n\t"
        "pmull	v2.1q, v3.1d, v5.1d\n\t"
        "pmull2	v3.1q, v3.2d, v5.2d\n\t"
        "eor	v2.16b, v2.16b, v3.16b\n\t"
        /*   Reduce */
        "ext	v3.16b, v0.16b, v1.16b, #8\n\t"
        "pmull2	v1.1q, v1.2d, v7.2d\n\t"
        "eor	v3.16b, v3.16b, v1.16b\n\t"
        "eor	v3.16b, v3.16b, v2.16b\n\t"
        "pmull2	v2.1q, v3.2d, v7.2d\n\t"
        "mov	v0.d[1], v3.d[0]\n\t"
        "eor	v6.16b, v0.16b, v2.16b\n\t"
        /* Done GHASH */
        "st1	{v6.2d}, [%x[tag]]\n\t"
        : [tag] "+r" (tag), [gcm_h] "+r" (gcm_h)
        : [data] "r" (data)
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8"
    );
}

void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag,
    byte* gcm_h)
{
    __asm__ __volatile__ (
        "ld1	{v20.2d}, [%x[tag]]\n\t"
        "movi	v21.16b, #0x87\n\t"
        "ld1	{v12.2d}, [%x[gcm_h]]\n\t"
        "ushr	v21.2d, v21.2d, #56\n\t"
        "cmp	%w[abytes], #0x40\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_h_done_%=\n\t"
        /* Square H => H^2 */
        "pmull2	v11.1q, v12.2d, v12.2d\n\t"
        "pmull	v10.1q, v12.1d, v12.1d\n\t"
        "pmull2	v8.1q, v11.2d, v21.2d\n\t"
        "ext	v9.16b, v10.16b, v11.16b, #8\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "pmull2	v11.1q, v9.2d, v21.2d\n\t"
        "mov	v10.d[1], v9.d[0]\n\t"
        "eor	v13.16b, v10.16b, v11.16b\n\t"
        "cmp	%w[abytes], #0x100\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^2  => H^3 */
        "pmull	v8.1q, v12.1d, v13.1d\n\t"
        "pmull2	v9.1q, v12.2d, v13.2d\n\t"
        "ext	v11.16b, v12.16b, v12.16b, #8\n\t"
        "pmull	v10.1q, v11.1d, v13.1d\n\t"
        "pmull2	v11.1q, v11.2d, v13.2d\n\t"
        "eor	v10.16b, v10.16b, v11.16b\n\t"
        /*   Reduce */
        "ext	v11.16b, v8.16b, v9.16b, #8\n\t"
        "pmull2	v9.1q, v9.2d, v21.2d\n\t"
        "eor	v11.16b, v11.16b, v9.16b\n\t"
        "eor	v11.16b, v11.16b, v10.16b\n\t"
        "pmull2	v10.1q, v11.2d, v21.2d\n\t"
        "mov	v8.d[1], v11.d[0]\n\t"
        "eor	v14.16b, v8.16b, v10.16b\n\t"
        /* Square H^2 => H^4 */
        "pmull2	v11.1q, v13.2d, v13.2d\n\t"
        "pmull	v10.1q, v13.1d, v13.1d\n\t"
        "pmull2	v8.1q, v11.2d, v21.2d\n\t"
        "ext	v9.16b, v10.16b, v11.16b, #8\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "pmull2	v11.1q, v9.2d, v21.2d\n\t"
        "mov	v10.d[1], v9.d[0]\n\t"
        "eor	v15.16b, v10.16b, v11.16b\n\t"
        /* Done */
        "cmp	%w[abytes], #0x400\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^4  => H^5 */
        "pmull	v8.1q, v12.1d, v15.1d\n\t"
        "pmull2	v9.1q, v12.2d, v15.2d\n\t"
        "ext	v11.16b, v12.16b, v12.16b, #8\n\t"
        "pmull	v10.1q, v11.1d, v15.1d\n\t"
        "pmull2	v11.1q, v11.2d, v15.2d\n\t"
        "eor	v10.16b, v10.16b, v11.16b\n\t"
        /*   Reduce */
        "ext	v11.16b, v8.16b, v9.16b, #8\n\t"
        "pmull2	v9.1q, v9.2d, v21.2d\n\t"
        "eor	v11.16b, v11.16b, v9.16b\n\t"
        "eor	v11.16b, v11.16b, v10.16b\n\t"
        "pmull2	v10.1q, v11.2d, v21.2d\n\t"
        "mov	v8.d[1], v11.d[0]\n\t"
        "eor	v16.16b, v8.16b, v10.16b\n\t"
        /* Square H^3 => H^6 */
        "pmull2	v11.1q, v14.2d, v14.2d\n\t"
        "pmull	v10.1q, v14.1d, v14.1d\n\t"
        "pmull2	v8.1q, v11.2d, v21.2d\n\t"
        "ext	v9.16b, v10.16b, v11.16b, #8\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "pmull2	v11.1q, v9.2d, v21.2d\n\t"
        "mov	v10.d[1], v9.d[0]\n\t"
        "eor	v17.16b, v10.16b, v11.16b\n\t"
        /* Multiply H and H^6  => H^7 */
        "pmull	v8.1q, v12.1d, v17.1d\n\t"
        "pmull2	v9.1q, v12.2d, v17.2d\n\t"
        "ext	v11.16b, v12.16b, v12.16b, #8\n\t"
        "pmull	v10.1q, v11.1d, v17.1d\n\t"
        "pmull2	v11.1q, v11.2d, v17.2d\n\t"
        "eor	v10.16b, v10.16b, v11.16b\n\t"
        /*   Reduce */
        "ext	v11.16b, v8.16b, v9.16b, #8\n\t"
        "pmull2	v9.1q, v9.2d, v21.2d\n\t"
        "eor	v11.16b, v11.16b, v9.16b\n\t"
        "eor	v11.16b, v11.16b, v10.16b\n\t"
        "pmull2	v10.1q, v11.2d, v21.2d\n\t"
        "mov	v8.d[1], v11.d[0]\n\t"
        "eor	v18.16b, v8.16b, v10.16b\n\t"
        /* Square H^4 => H^8 */
        "pmull2	v11.1q, v15.2d, v15.2d\n\t"
        "pmull	v10.1q, v15.1d, v15.1d\n\t"
        "pmull2	v8.1q, v11.2d, v21.2d\n\t"
        "ext	v9.16b, v10.16b, v11.16b, #8\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "pmull2	v11.1q, v9.2d, v21.2d\n\t"
        "mov	v10.d[1], v9.d[0]\n\t"
        "eor	v19.16b, v10.16b, v11.16b\n\t"
        /* Done */
        "\n"
    "L_aes_gcm_aad_update_arm64_crypto_h_done_%=: \n\t"
        "lsr	%w[abytes], %w[abytes], #4\n\t"
        "cmp	%w[abytes], #4\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t"
        "cmp	%w[abytes], #16\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t"
        "cmp	%w[abytes], #0x40\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_aad_update_arm64_crypto_start_8_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t"
        "ld1	{v4.16b, v5.16b, v6.16b, v7.16b}, [%x[aadt]], #0x40\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rbit	v4.16b, v4.16b\n\t"
        "rbit	v5.16b, v5.16b\n\t"
        "rbit	v6.16b, v6.16b\n\t"
        "rbit	v7.16b, v7.16b\n\t"
        "eor	v0.16b, v0.16b, v20.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v8.1q, v7.1d, v12.1d\n\t"
        "pmull2	v9.1q, v7.2d, v12.2d\n\t"
        "ext	v11.16b, v7.16b, v7.16b, #8\n\t"
        "pmull	v10.1q, v11.1d, v12.1d\n\t"
        "pmull2	v11.1q, v11.2d, v12.2d\n\t"
        "eor	v10.16b, v10.16b, v11.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v11.1q, v13.1d, v6.1d\n\t"
        "pmull2	v20.1q, v13.2d, v6.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v6.16b, v6.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v13.1d\n\t"
        "pmull2	v20.1q, v20.2d, v13.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v11.1q, v14.1d, v5.1d\n\t"
        "pmull2	v20.1q, v14.2d, v5.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v5.16b, v5.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v14.1d\n\t"
        "pmull2	v20.1q, v20.2d, v14.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v11.1q, v15.1d, v4.1d\n\t"
        "pmull2	v20.1q, v15.2d, v4.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v4.16b, v4.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v15.1d\n\t"
        "pmull2	v20.1q, v20.2d, v15.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v11.1q, v16.1d, v3.1d\n\t"
        "pmull2	v20.1q, v16.2d, v3.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v16.1d\n\t"
        "pmull2	v20.1q, v20.2d, v16.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v11.1q, v17.1d, v2.1d\n\t"
        "pmull2	v20.1q, v17.2d, v2.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v17.1d\n\t"
        "pmull2	v20.1q, v20.2d, v17.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v11.1q, v18.1d, v1.1d\n\t"
        "pmull2	v20.1q, v18.2d, v1.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v18.1d\n\t"
        "pmull2	v20.1q, v20.2d, v18.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v11.1q, v19.1d, v0.1d\n\t"
        "pmull2	v20.1q, v19.2d, v0.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v19.1d\n\t"
        "pmull2	v20.1q, v20.2d, v19.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   Reduce */
        "ext	v11.16b, v8.16b, v9.16b, #8\n\t"
        "pmull2	v9.1q, v9.2d, v21.2d\n\t"
        "eor	v11.16b, v11.16b, v9.16b\n\t"
        "eor	v11.16b, v11.16b, v10.16b\n\t"
        "pmull2	v10.1q, v11.2d, v21.2d\n\t"
        "mov	v8.d[1], v11.d[0]\n\t"
        "eor	v20.16b, v8.16b, v10.16b\n\t"
        /* Done GHASH */
        "sub	%w[abytes], %w[abytes], #8\n\t"
        "cmp	%w[abytes], #8\n\t"
        "b.ge	L_aes_gcm_aad_update_arm64_crypto_start_8_%=\n\t"
        "cmp	%w[abytes], #1\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t"
        "b.eq	L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t"
        "cmp	%w[abytes], #16\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t"
        "\n"
    "L_aes_gcm_aad_update_arm64_crypto_start_4_%=: \n\t"
        "ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v0.16b, v0.16b, v20.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v8.1q, v3.1d, v12.1d\n\t"
        "pmull2	v9.1q, v3.2d, v12.2d\n\t"
        "ext	v11.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v10.1q, v11.1d, v12.1d\n\t"
        "pmull2	v11.1q, v11.2d, v12.2d\n\t"
        "eor	v10.16b, v10.16b, v11.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v11.1q, v13.1d, v2.1d\n\t"
        "pmull2	v20.1q, v13.2d, v2.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v13.1d\n\t"
        "pmull2	v20.1q, v20.2d, v13.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v11.1q, v14.1d, v1.1d\n\t"
        "pmull2	v20.1q, v14.2d, v1.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v14.1d\n\t"
        "pmull2	v20.1q, v20.2d, v14.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v11.1q, v15.1d, v0.1d\n\t"
        "pmull2	v20.1q, v15.2d, v0.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v15.1d\n\t"
        "pmull2	v20.1q, v20.2d, v15.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   Reduce */
        "ext	v11.16b, v8.16b, v9.16b, #8\n\t"
        "pmull2	v9.1q, v9.2d, v21.2d\n\t"
        "eor	v11.16b, v11.16b, v9.16b\n\t"
        "eor	v11.16b, v11.16b, v10.16b\n\t"
        "pmull2	v10.1q, v11.2d, v21.2d\n\t"
        "mov	v8.d[1], v11.d[0]\n\t"
        "eor	v20.16b, v8.16b, v10.16b\n\t"
        /* Done GHASH */
        "sub	%w[abytes], %w[abytes], #4\n\t"
        "cmp	%w[abytes], #4\n\t"
        "b.ge	L_aes_gcm_aad_update_arm64_crypto_start_4_%=\n\t"
        "cmp	%w[abytes], #1\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t"
        "b.eq	L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t"
        "\n"
    "L_aes_gcm_aad_update_arm64_crypto_start_2_%=: \n\t"
        "ld1	{v0.16b, v1.16b}, [%x[aadt]], #32\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "eor	v3.16b, v20.16b, v0.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v8.1q, v1.1d, v12.1d\n\t"
        "pmull2	v9.1q, v1.2d, v12.2d\n\t"
        "ext	v11.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v10.1q, v11.1d, v12.1d\n\t"
        "pmull2	v11.1q, v11.2d, v12.2d\n\t"
        "eor	v10.16b, v10.16b, v11.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v11.1q, v13.1d, v3.1d\n\t"
        "pmull2	v20.1q, v13.2d, v3.2d\n\t"
        "eor	v8.16b, v8.16b, v11.16b\n\t"
        "eor	v9.16b, v9.16b, v20.16b\n\t"
        "ext	v20.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v11.1q, v20.1d, v13.1d\n\t"
        "pmull2	v20.1q, v20.2d, v13.2d\n\t"
        "eor	v20.16b, v20.16b, v11.16b\n\t"
        "eor	v10.16b, v10.16b, v20.16b\n\t"
        /*   Reduce */
        "ext	v11.16b, v8.16b, v9.16b, #8\n\t"
        "pmull2	v9.1q, v9.2d, v21.2d\n\t"
        "eor	v11.16b, v11.16b, v9.16b\n\t"
        "eor	v11.16b, v11.16b, v10.16b\n\t"
        "pmull2	v10.1q, v11.2d, v21.2d\n\t"
        "mov	v8.d[1], v11.d[0]\n\t"
        "eor	v20.16b, v8.16b, v10.16b\n\t"
        /* Done GHASH */
        "sub	%w[abytes], %w[abytes], #2\n\t"
        "cmp	%w[abytes], #1\n\t"
        "b.gt	L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t"
        "b.lt	L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_gcm_aad_update_arm64_crypto_start_1_%=: \n\t"
        "cbz	%w[abytes], L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t"
        "\n"
    "L_aes_gcm_aad_update_arm64_crypto_both_1_%=: \n\t"
        "ld1	{v0.16b}, [%x[aadt]], #16\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "eor	v3.16b, v20.16b, v0.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v8.1q, v3.1d, v12.1d\n\t"
        "pmull2	v9.1q, v3.2d, v12.2d\n\t"
        "ext	v11.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v10.1q, v11.1d, v12.1d\n\t"
        "pmull2	v11.1q, v11.2d, v12.2d\n\t"
        "eor	v10.16b, v10.16b, v11.16b\n\t"
        /*   Reduce */
        "ext	v11.16b, v8.16b, v9.16b, #8\n\t"
        "pmull2	v9.1q, v9.2d, v21.2d\n\t"
        "eor	v11.16b, v11.16b, v9.16b\n\t"
        "eor	v11.16b, v11.16b, v10.16b\n\t"
        "pmull2	v10.1q, v11.2d, v21.2d\n\t"
        "mov	v8.d[1], v11.d[0]\n\t"
        "eor	v20.16b, v8.16b, v10.16b\n\t"
        /* Done GHASH */
        "subs	%w[abytes], %w[abytes], #1\n\t"
        "b.ne	L_aes_gcm_aad_update_arm64_crypto_both_1_%=\n\t"
        "\n"
    "L_aes_gcm_aad_update_arm64_crypto_done_%=: \n\t"
        "st1	{v20.2d}, [%x[tag]]\n\t"
        : [abytes] "+r" (abytes), [tag] "+r" (tag), [gcm_h] "+r" (gcm_h)
        : [aadt] "r" (aadt)
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
            "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
            "v19", "v20", "v21", "v22"
    );
}

void AES_GCM_encrypt_block_AARCH64(const byte* key, int nr, byte* out,
    const byte* in, byte* counter)
{
    __asm__ __volatile__ (
        "ld1	{v5.2d}, [%x[counter]]\n\t"
        "ld1	{v4.2d}, [%x[in]]\n\t"
        "mov	w5, v5.s[3]\n\t"
        "rev	w5, w5\n\t"
        "add	w5, w5, #1\n\t"
        "rev	w5, w5\n\t"
        "mov	v5.s[3], w5\n\t"
        "st1	{v5.2d}, [%x[counter]]\n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t"
        "aese	v5.16b, v0.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v1.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v2.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v3.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t"
        "aese	v5.16b, v0.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v1.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v2.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v3.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "subs	%w[nr], %w[nr], #10\n\t"
        "ld1	{v0.2d, v1.2d}, [%x[key]], #32\n\t"
        "aese	v5.16b, v0.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v1.16b\n\t"
        "b.eq	L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=\n\t"
        "ld1	{v0.2d, v1.2d}, [%x[key]], #32\n\t"
        "subs	%w[nr], %w[nr], #2\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v0.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v1.16b\n\t"
        "b.eq	L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=\n\t"
        "ld1	{v0.2d, v1.2d}, [%x[key]], #32\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v0.16b\n\t"
        "aesmc	v5.16b, v5.16b\n\t"
        "aese	v5.16b, v1.16b\n\t"
        "\n"
    "L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=: \n\t"
        "ld1	{v0.2d}, [%x[key]]\n\t"
        "eor	v5.16b, v5.16b, v0.16b\n\t"
        "eor	v4.16b, v4.16b, v5.16b\n\t"
        "st1	{v4.2d}, [%x[out]]\n\t"
        : [nr] "+r" (nr), [out] "+r" (out), [counter] "+r" (counter)
        : [key] "r" (key), [in] "r" (in)
        : "memory", "cc", "x5", "v0", "v1", "v2", "v3", "v4", "v5"
    );
}

void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out,
    const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter)
{
    __asm__ __volatile__ (
        "stp	x29, x30, [sp, #-32]!\n\t"
        "add	x29, sp, #0\n\t"
        "ld1	{v13.2d}, [%x[counter]]\n\t"
        "movi	v27.16b, #0x87\n\t"
        "ld1	{v26.2d}, [%x[tag]]\n\t"
        "ushr	v27.2d, v27.2d, #56\n\t"
        "ld1	{v22.2d}, [%x[h]]\n\t"
        "mov	w9, v13.s[3]\n\t"
        "rev	w9, w9\n\t"
        "cmp	%w[nbytes], #32\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=\n\t"
        /* Square H => H^2 */
        "pmull2	v31.1q, v22.2d, v22.2d\n\t"
        "pmull	v30.1q, v22.1d, v22.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v23.16b, v30.16b, v31.16b\n\t"
        "cmp	%w[nbytes], #0x40\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^2  => H^3 */
        "pmull	v28.1q, v22.1d, v23.1d\n\t"
        "pmull2	v29.1q, v22.2d, v23.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v23.1d\n\t"
        "pmull2	v31.1q, v31.2d, v23.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v24.16b, v28.16b, v30.16b\n\t"
        /* Square H^2 => H^4 */
        "pmull2	v31.1q, v23.2d, v23.2d\n\t"
        "pmull	v30.1q, v23.1d, v23.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v25.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "cmp	%w[nbytes], #0x200\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=\n\t"
        /* Multiply H and H^4  => H^5 */
        "pmull	v28.1q, v22.1d, v25.1d\n\t"
        "pmull2	v29.1q, v22.2d, v25.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v25.1d\n\t"
        "pmull2	v31.1q, v31.2d, v25.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v4.16b, v28.16b, v30.16b\n\t"
        /* Square H^3 => H^6 */
        "pmull2	v31.1q, v24.2d, v24.2d\n\t"
        "pmull	v30.1q, v24.1d, v24.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v5.16b, v30.16b, v31.16b\n\t"
        /* Multiply H and H^6  => H^7 */
        "pmull	v28.1q, v22.1d, v5.1d\n\t"
        "pmull2	v29.1q, v22.2d, v5.2d\n\t"
        "ext	v31.16b, v22.16b, v22.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v5.1d\n\t"
        "pmull2	v31.1q, v31.2d, v5.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v6.16b, v28.16b, v30.16b\n\t"
        /* Square H^4 => H^8 */
        "pmull2	v31.1q, v25.2d, v25.2d\n\t"
        "pmull	v30.1q, v25.1d, v25.1d\n\t"
        "pmull2	v28.1q, v31.2d, v27.2d\n\t"
        "ext	v29.16b, v30.16b, v31.16b, #8\n\t"
        "eor	v29.16b, v29.16b, v28.16b\n\t"
        "pmull2	v31.1q, v29.2d, v27.2d\n\t"
        "mov	v30.d[1], v29.d[0]\n\t"
        "eor	v7.16b, v30.16b, v31.16b\n\t"
        /* Done */
        "\n"
    "L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=: \n\t"
        "lsr	w8, %w[nbytes], #4\n\t"
        "cmp	%w[nr], #12\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_start_128_%=\n\t"
        "b.gt	L_aes_gcm_encrypt_update_arm64_crypto_start_256_%=\n\t"
        /* AES_GCM_192 */
#ifndef NO_AES_192
        "cmp	w8, #32\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_192_start_4_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_update_arm64_crypto_192_start_8_%=: \n\t"
        "ldr	q12, [%x[key]]\n\t"
        "add	w17, w9, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w16, w9, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w15, w9, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w14, w9, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w13, w9, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w12, w9, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w11, w9, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w9, w9, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rev	w17, w17\n\t"
        "rev	w16, w16\n\t"
        "rev	w15, w15\n\t"
        "rev	w14, w14\n\t"
        "rev	w13, w13\n\t"
        "rev	w12, w12\n\t"
        "rev	w11, w11\n\t"
        "rev	w10, w9\n\t"
        "mov	v14.s[3], w17\n\t"
        "mov	v15.s[3], w16\n\t"
        "mov	v16.s[3], w15\n\t"
        "mov	v17.s[3], w14\n\t"
        "mov	v8.s[3], w13\n\t"
        "mov	v9.s[3], w12\n\t"
        "mov	v10.s[3], w11\n\t"
        "mov	v11.s[3], w10\n\t"
        "ldr	q13, [%x[key], #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w8, w8, #8\n\t"
        "ldr	q12, [%x[key], #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "ld1	{v13.2d}, [%x[counter]]\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w8, #8\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_192_end_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_update_arm64_crypto_192_both_8_%=: \n\t"
        "ldr	q12, [%x[key]]\n\t"
        "add	w17, w9, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w16, w9, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w15, w9, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w14, w9, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "add	w13, w9, #5\n\t"
        "mov	v8.16b, v13.16b\n\t"
        "add	w12, w9, #6\n\t"
        "mov	v9.16b, v13.16b\n\t"
        "add	w11, w9, #7\n\t"
        "mov	v10.16b, v13.16b\n\t"
        "add	w9, w9, #8\n\t"
        "mov	v11.16b, v13.16b\n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rev	w17, w17\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rev	w16, w16\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rev	w15, w15\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rev	w14, w14\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rev	w13, w13\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rev	w12, w12\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rev	w11, w11\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "rev	w10, w9\n\t"
        "mov	v14.s[3], w17\n\t"
        "mov	v15.s[3], w16\n\t"
        "mov	v16.s[3], w15\n\t"
        "mov	v17.s[3], w14\n\t"
        "mov	v8.s[3], w13\n\t"
        "mov	v9.s[3], w12\n\t"
        "mov	v10.s[3], w11\n\t"
        "mov	v11.s[3], w10\n\t"
        "ldr	q13, [%x[key], #16]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #32]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #48]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #64]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #80]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #96]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #112]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "subs	w8, w8, #8\n\t"
        "ldr	q12, [%x[key], #128]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v18.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "ld1	{v19.16b}, [%x[in]], #16\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #144]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "ld1	{v20.16b}, [%x[in]], #16\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "ld1	{v21.16b}, [%x[in]], #16\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "ld1	{v0.16b}, [%x[in]], #16\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "ld1	{v1.16b}, [%x[in]], #16\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "ld1	{v2.16b}, [%x[in]], #16\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "ld1	{v3.16b}, [%x[in]], #16\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #160]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q13, [%x[key], #176]\n\t"
        "aese	v14.16b, v12.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v12.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v12.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v12.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v8.16b, v12.16b\n\t"
        "aesmc	v8.16b, v8.16b\n\t"
        "aese	v9.16b, v12.16b\n\t"
        "aesmc	v9.16b, v9.16b\n\t"
        "aese	v10.16b, v12.16b\n\t"
        "aesmc	v10.16b, v10.16b\n\t"
        "aese	v11.16b, v12.16b\n\t"
        "aesmc	v11.16b, v11.16b\n\t"
        "ldr	q12, [%x[key], #192]\n\t"
        "aese	v14.16b, v13.16b\n\t"
        "eor	v14.16b, v14.16b, v12.16b\n\t"
        "aese	v15.16b, v13.16b\n\t"
        "eor	v15.16b, v15.16b, v12.16b\n\t"
        "aese	v16.16b, v13.16b\n\t"
        "eor	v16.16b, v16.16b, v12.16b\n\t"
        "aese	v17.16b, v13.16b\n\t"
        "eor	v17.16b, v17.16b, v12.16b\n\t"
        "aese	v8.16b, v13.16b\n\t"
        "eor	v8.16b, v8.16b, v12.16b\n\t"
        "aese	v9.16b, v13.16b\n\t"
        "eor	v9.16b, v9.16b, v12.16b\n\t"
        "aese	v10.16b, v13.16b\n\t"
        "eor	v10.16b, v10.16b, v12.16b\n\t"
        "aese	v11.16b, v13.16b\n\t"
        "eor	v11.16b, v11.16b, v12.16b\n\t"
        "ld1	{v13.2d}, [%x[counter]]\n\t"
        "eor	v18.16b, v18.16b, v14.16b\n\t"
        "eor	v19.16b, v19.16b, v15.16b\n\t"
        "eor	v20.16b, v20.16b, v16.16b\n\t"
        "eor	v21.16b, v21.16b, v17.16b\n\t"
        "st1	{v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t"
        "eor	v0.16b, v0.16b, v8.16b\n\t"
        "eor	v1.16b, v1.16b, v9.16b\n\t"
        "eor	v2.16b, v2.16b, v10.16b\n\t"
        "eor	v3.16b, v3.16b, v11.16b\n\t"
        "st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t"
        "cmp	w8, #8\n\t"
        "b.ge	L_aes_gcm_encrypt_update_arm64_crypto_192_both_8_%=\n\t"
        "\n"
    "L_aes_gcm_encrypt_update_arm64_crypto_192_end_8_%=: \n\t"
        "rbit	v18.16b, v18.16b\n\t"
        "rbit	v19.16b, v19.16b\n\t"
        "rbit	v20.16b, v20.16b\n\t"
        "rbit	v21.16b, v21.16b\n\t"
        "rbit	v0.16b, v0.16b\n\t"
        "rbit	v1.16b, v1.16b\n\t"
        "rbit	v2.16b, v2.16b\n\t"
        "rbit	v3.16b, v3.16b\n\t"
        "eor	v18.16b, v18.16b, v26.16b\n\t"
        /*   X  = C * H^1 */
        "pmull	v28.1q, v3.1d, v22.1d\n\t"
        "pmull2	v29.1q, v3.2d, v22.2d\n\t"
        "ext	v31.16b, v3.16b, v3.16b, #8\n\t"
        "pmull	v30.1q, v31.1d, v22.1d\n\t"
        "pmull2	v31.1q, v31.2d, v22.2d\n\t"
        "eor	v30.16b, v30.16b, v31.16b\n\t"
        /*   X += C * H^2 */
        "pmull	v31.1q, v23.1d, v2.1d\n\t"
        "pmull2	v26.1q, v23.2d, v2.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v2.16b, v2.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v23.1d\n\t"
        "pmull2	v26.1q, v26.2d, v23.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^3 */
        "pmull	v31.1q, v24.1d, v1.1d\n\t"
        "pmull2	v26.1q, v24.2d, v1.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v1.16b, v1.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v24.1d\n\t"
        "pmull2	v26.1q, v26.2d, v24.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^4 */
        "pmull	v31.1q, v25.1d, v0.1d\n\t"
        "pmull2	v26.1q, v25.2d, v0.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v0.16b, v0.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v25.1d\n\t"
        "pmull2	v26.1q, v26.2d, v25.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^5 */
        "pmull	v31.1q, v4.1d, v21.1d\n\t"
        "pmull2	v26.1q, v4.2d, v21.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v21.16b, v21.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v4.1d\n\t"
        "pmull2	v26.1q, v26.2d, v4.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^6 */
        "pmull	v31.1q, v5.1d, v20.1d\n\t"
        "pmull2	v26.1q, v5.2d, v20.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v20.16b, v20.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v5.1d\n\t"
        "pmull2	v26.1q, v26.2d, v5.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^7 */
        "pmull	v31.1q, v6.1d, v19.1d\n\t"
        "pmull2	v26.1q, v6.2d, v19.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v19.16b, v19.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v6.1d\n\t"
        "pmull2	v26.1q, v26.2d, v6.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   X += C * H^8 */
        "pmull	v31.1q, v7.1d, v18.1d\n\t"
        "pmull2	v26.1q, v7.2d, v18.2d\n\t"
        "eor	v28.16b, v28.16b, v31.16b\n\t"
        "eor	v29.16b, v29.16b, v26.16b\n\t"
        "ext	v26.16b, v18.16b, v18.16b, #8\n\t"
        "pmull	v31.1q, v26.1d, v7.1d\n\t"
        "pmull2	v26.1q, v26.2d, v7.2d\n\t"
        "eor	v26.16b, v26.16b, v31.16b\n\t"
        "eor	v30.16b, v30.16b, v26.16b\n\t"
        /*   Reduce */
        "ext	v31.16b, v28.16b, v29.16b, #8\n\t"
        "pmull2	v29.1q, v29.2d, v27.2d\n\t"
        "eor	v31.16b, v31.16b, v29.16b\n\t"
        "eor	v31.16b, v31.16b, v30.16b\n\t"
        "pmull2	v30.1q, v31.2d, v27.2d\n\t"
        "mov	v28.d[1], v31.d[0]\n\t"
        "eor	v26.16b, v28.16b, v30.16b\n\t"
        /* Done GHASH */
        "\n"
    "L_aes_gcm_encrypt_update_arm64_crypto_192_start_4_%=: \n\t"
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t"
        "ld1	{v12.2d}, [%x[key]]\n\t"
        "cmp	w8, #1\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=\n\t"
        "b.eq	L_aes_gcm_encrypt_update_arm64_crypto_192_start_1_%=\n\t"
        "cmp	w8, #4\n\t"
        "b.lt	L_aes_gcm_encrypt_update_arm64_crypto_192_start_2_%=\n\t"
        "add	w13, w9, #1\n\t"
        "mov	v14.16b, v13.16b\n\t"
        "add	w12, w9, #2\n\t"
        "mov	v15.16b, v13.16b\n\t"
        "add	w11, w9, #3\n\t"
        "mov	v16.16b, v13.16b\n\t"
        "add	w9, w9, #4\n\t"
        "mov	v17.16b, v13.16b\n\t"
        "rev	w13, w13\n\t"
        "rev	w12, w12\n\t"
        "rev	w11, w11\n\t"
        "rev	w10, w9\n\t"
        "mov	v14.s[3], w13\n\t"
        "mov	v15.s[3], w12\n\t"
        "mov	v16.s[3], w11\n\t"
        "mov	v17.s[3], w10\n\t"
        "aese	v14.16b, v0.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v0.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v0.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v0.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v1.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v1.16b\n\t"
        "aesmc	v15.16b, v15.16b\n\t"
        "aese	v16.16b, v1.16b\n\t"
        "aesmc	v16.16b, v16.16b\n\t"
        "aese	v17.16b, v1.16b\n\t"
        "aesmc	v17.16b, v17.16b\n\t"
        "aese	v14.16b, v2.16b\n\t"
        "aesmc	v14.16b, v14.16b\n\t"
        "aese	v15.16b, v2.16b\n\t"