root/lib/crypto/powerpc/aes-spe-modes.S
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
 *
 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
 */

#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"

#ifdef __BIG_ENDIAN__                   /* Macros for big endian builds */

#define LOAD_DATA(reg, off) \
        lwz             reg,off(rSP);   /* load with offset             */
#define SAVE_DATA(reg, off) \
        stw             reg,off(rDP);   /* save with offset             */
#define NEXT_BLOCK \
        addi            rSP,rSP,16;     /* increment pointers per bloc  */ \
        addi            rDP,rDP,16;
#define LOAD_IV(reg, off) \
        lwz             reg,off(rIP);   /* IV loading with offset       */
#define SAVE_IV(reg, off) \
        stw             reg,off(rIP);   /* IV saving with offset        */
#define START_IV                        /* nothing to reset             */
#define CBC_DEC 16                      /* CBC decrement per block      */
#define CTR_DEC 1                       /* CTR decrement one byte       */

#else                                   /* Macros for little endian     */

#define LOAD_DATA(reg, off) \
        lwbrx           reg,0,rSP;      /* load reversed                */ \
        addi            rSP,rSP,4;      /* and increment pointer        */
#define SAVE_DATA(reg, off) \
        stwbrx          reg,0,rDP;      /* save reversed                */ \
        addi            rDP,rDP,4;      /* and increment pointer        */
#define NEXT_BLOCK                      /* nothing todo                 */
#define LOAD_IV(reg, off) \
        lwbrx           reg,0,rIP;      /* load reversed                */ \
        addi            rIP,rIP,4;      /* and increment pointer        */
#define SAVE_IV(reg, off) \
        stwbrx          reg,0,rIP;      /* load reversed                */ \
        addi            rIP,rIP,4;      /* and increment pointer        */
#define START_IV \
        subi            rIP,rIP,16;     /* must reset pointer           */
#define CBC_DEC 32                      /* 2 blocks because of incs     */
#define CTR_DEC 17                      /* 1 block because of incs      */

#endif

#define SAVE_0_REGS
#define LOAD_0_REGS

#define SAVE_4_REGS \
        stw             rI0,96(r1);     /* save 32 bit registers        */ \
        stw             rI1,100(r1);                                       \
        stw             rI2,104(r1);                                       \
        stw             rI3,108(r1);

#define LOAD_4_REGS \
        lwz             rI0,96(r1);     /* restore 32 bit registers     */ \
        lwz             rI1,100(r1);                                       \
        lwz             rI2,104(r1);                                       \
        lwz             rI3,108(r1);

#define SAVE_8_REGS \
        SAVE_4_REGS                                                        \
        stw             rG0,112(r1);    /* save 32 bit registers        */ \
        stw             rG1,116(r1);                                       \
        stw             rG2,120(r1);                                       \
        stw             rG3,124(r1);

#define LOAD_8_REGS \
        LOAD_4_REGS                                                        \
        lwz             rG0,112(r1);    /* restore 32 bit registers     */ \
        lwz             rG1,116(r1);                                       \
        lwz             rG2,120(r1);                                       \
        lwz             rG3,124(r1);

#define INITIALIZE_CRYPT(tab,nr32bitregs) \
        mflr            r0;                                                \
        stwu            r1,-160(r1);    /* create stack frame           */ \
        lis             rT0,tab@h;      /* en-/decryption table pointer */ \
        stw             r0,8(r1);       /* save link register           */ \
        ori             rT0,rT0,tab@l;                                     \
        evstdw          r14,16(r1);                                        \
        mr              rKS,rKP;                                           \
        evstdw          r15,24(r1);     /* We must save non volatile    */ \
        evstdw          r16,32(r1);     /* registers. Take the chance   */ \
        evstdw          r17,40(r1);     /* and save the SPE part too    */ \
        evstdw          r18,48(r1);                                        \
        evstdw          r19,56(r1);                                        \
        evstdw          r20,64(r1);                                        \
        evstdw          r21,72(r1);                                        \
        evstdw          r22,80(r1);                                        \
        evstdw          r23,88(r1);                                        \
        SAVE_##nr32bitregs##_REGS

#define FINALIZE_CRYPT(nr32bitregs) \
        lwz             r0,8(r1);                                          \
        evldw           r14,16(r1);     /* restore SPE registers        */ \
        evldw           r15,24(r1);                                        \
        evldw           r16,32(r1);                                        \
        evldw           r17,40(r1);                                        \
        evldw           r18,48(r1);                                        \
        evldw           r19,56(r1);                                        \
        evldw           r20,64(r1);                                        \
        evldw           r21,72(r1);                                        \
        evldw           r22,80(r1);                                        \
        evldw           r23,88(r1);                                        \
        LOAD_##nr32bitregs##_REGS                                          \
        mtlr            r0;             /* restore link register        */ \
        xor             r0,r0,r0;                                          \
        stw             r0,16(r1);      /* delete sensitive data        */ \
        stw             r0,24(r1);      /* that we might have pushed    */ \
        stw             r0,32(r1);      /* from other context that runs */ \
        stw             r0,40(r1);      /* the same code                */ \
        stw             r0,48(r1);                                         \
        stw             r0,56(r1);                                         \
        stw             r0,64(r1);                                         \
        stw             r0,72(r1);                                         \
        stw             r0,80(r1);                                         \
        stw             r0,88(r1);                                         \
        addi            r1,r1,160;      /* cleanup stack frame          */

#define ENDIAN_SWAP(t0, t1, s0, s1) \
        rotrwi          t0,s0,8;        /* swap endianness for 2 GPRs   */ \
        rotrwi          t1,s1,8;                                           \
        rlwimi          t0,s0,8,8,15;                                      \
        rlwimi          t1,s1,8,8,15;                                      \
        rlwimi          t0,s0,8,24,31;                                     \
        rlwimi          t1,s1,8,24,31;

#define GF128_MUL(d0, d1, d2, d3, t0) \
        li              t0,0x87;        /* multiplication in GF128      */ \
        cmpwi           d3,-1;                                             \
        iselgt          t0,0,t0;                                           \
        rlwimi          d3,d2,0,0,0;    /* propagate "carry" bits       */ \
        rotlwi          d3,d3,1;                                           \
        rlwimi          d2,d1,0,0,0;                                       \
        rotlwi          d2,d2,1;                                           \
        rlwimi          d1,d0,0,0,0;                                       \
        slwi            d0,d0,1;        /* shift left 128 bit           */ \
        rotlwi          d1,d1,1;                                           \
        xor             d0,d0,t0;

#define START_KEY(d0, d1, d2, d3) \
        lwz             rW0,0(rKP);                                        \
        mtctr           rRR;                                               \
        lwz             rW1,4(rKP);                                        \
        lwz             rW2,8(rKP);                                        \
        lwz             rW3,12(rKP);                                       \
        xor             rD0,d0,rW0;                                        \
        xor             rD1,d1,rW1;                                        \
        xor             rD2,d2,rW2;                                        \
        xor             rD3,d3,rW3;

/*
 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
 *                 u32 rounds)
 *
 * called from glue layer to encrypt a single 16 byte block
 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_aes)
        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
        LOAD_DATA(rD0, 0)
        LOAD_DATA(rD1, 4)
        LOAD_DATA(rD2, 8)
        LOAD_DATA(rD3, 12)
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_encrypt_block
        xor             rD0,rD0,rW0
        SAVE_DATA(rD0, 0)
        xor             rD1,rD1,rW1
        SAVE_DATA(rD1, 4)
        xor             rD2,rD2,rW2
        SAVE_DATA(rD2, 8)
        xor             rD3,rD3,rW3
        SAVE_DATA(rD3, 12)
        FINALIZE_CRYPT(0)
        blr

/*
 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
 *                 u32 rounds)
 *
 * called from glue layer to decrypt a single 16 byte block
 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_aes)
        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
        LOAD_DATA(rD0, 0)
        addi            rT1,rT0,4096
        LOAD_DATA(rD1, 4)
        LOAD_DATA(rD2, 8)
        LOAD_DATA(rD3, 12)
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_decrypt_block
        xor             rD0,rD0,rW0
        SAVE_DATA(rD0, 0)
        xor             rD1,rD1,rW1
        SAVE_DATA(rD1, 4)
        xor             rD2,rD2,rW2
        SAVE_DATA(rD2, 8)
        xor             rD3,rD3,rW3
        SAVE_DATA(rD3, 12)
        FINALIZE_CRYPT(0)
        blr

/*
 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
 *                 u32 rounds, u32 bytes);
 *
 * called from glue layer to encrypt multiple blocks via ECB
 * Bytes must be larger or equal 16 and only whole blocks are
 * processed. round values are AES128 = 4, AES192 = 5 and
 * AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_ecb)
        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
ppc_encrypt_ecb_loop:
        LOAD_DATA(rD0, 0)
        mr              rKP,rKS
        LOAD_DATA(rD1, 4)
        subi            rLN,rLN,16
        LOAD_DATA(rD2, 8)
        cmpwi           rLN,15
        LOAD_DATA(rD3, 12)
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_encrypt_block
        xor             rD0,rD0,rW0
        SAVE_DATA(rD0, 0)
        xor             rD1,rD1,rW1
        SAVE_DATA(rD1, 4)
        xor             rD2,rD2,rW2
        SAVE_DATA(rD2, 8)
        xor             rD3,rD3,rW3
        SAVE_DATA(rD3, 12)
        NEXT_BLOCK
        bt              gt,ppc_encrypt_ecb_loop
        FINALIZE_CRYPT(0)
        blr

/*
 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
 *                 u32 rounds, u32 bytes);
 *
 * called from glue layer to decrypt multiple blocks via ECB
 * Bytes must be larger or equal 16 and only whole blocks are
 * processed. round values are AES128 = 4, AES192 = 5 and
 * AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_ecb)
        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
        addi            rT1,rT0,4096
ppc_decrypt_ecb_loop:
        LOAD_DATA(rD0, 0)
        mr              rKP,rKS
        LOAD_DATA(rD1, 4)
        subi            rLN,rLN,16
        LOAD_DATA(rD2, 8)
        cmpwi           rLN,15
        LOAD_DATA(rD3, 12)
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_decrypt_block
        xor             rD0,rD0,rW0
        SAVE_DATA(rD0, 0)
        xor             rD1,rD1,rW1
        SAVE_DATA(rD1, 4)
        xor             rD2,rD2,rW2
        SAVE_DATA(rD2, 8)
        xor             rD3,rD3,rW3
        SAVE_DATA(rD3, 12)
        NEXT_BLOCK
        bt              gt,ppc_decrypt_ecb_loop
        FINALIZE_CRYPT(0)
        blr

/*
 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
 *                 32 rounds, u32 bytes, u8 *iv);
 *
 * called from glue layer to encrypt multiple blocks via CBC
 * Bytes must be larger or equal 16 and only whole blocks are
 * processed. round values are AES128 = 4, AES192 = 5 and
 * AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_cbc)
        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
        LOAD_IV(rI0, 0)
        LOAD_IV(rI1, 4)
        LOAD_IV(rI2, 8)
        LOAD_IV(rI3, 12)
ppc_encrypt_cbc_loop:
        LOAD_DATA(rD0, 0)
        mr              rKP,rKS
        LOAD_DATA(rD1, 4)
        subi            rLN,rLN,16
        LOAD_DATA(rD2, 8)
        cmpwi           rLN,15
        LOAD_DATA(rD3, 12)
        xor             rD0,rD0,rI0
        xor             rD1,rD1,rI1
        xor             rD2,rD2,rI2
        xor             rD3,rD3,rI3
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_encrypt_block
        xor             rI0,rD0,rW0
        SAVE_DATA(rI0, 0)
        xor             rI1,rD1,rW1
        SAVE_DATA(rI1, 4)
        xor             rI2,rD2,rW2
        SAVE_DATA(rI2, 8)
        xor             rI3,rD3,rW3
        SAVE_DATA(rI3, 12)
        NEXT_BLOCK
        bt              gt,ppc_encrypt_cbc_loop
        START_IV
        SAVE_IV(rI0, 0)
        SAVE_IV(rI1, 4)
        SAVE_IV(rI2, 8)
        SAVE_IV(rI3, 12)
        FINALIZE_CRYPT(4)
        blr

/*
 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
 *                 u32 rounds, u32 bytes, u8 *iv);
 *
 * called from glue layer to decrypt multiple blocks via CBC
 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_cbc)
        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
        li              rT1,15
        LOAD_IV(rI0, 0)
        andc            rLN,rLN,rT1
        LOAD_IV(rI1, 4)
        subi            rLN,rLN,16
        LOAD_IV(rI2, 8)
        add             rSP,rSP,rLN     /* reverse processing           */
        LOAD_IV(rI3, 12)
        add             rDP,rDP,rLN
        LOAD_DATA(rD0, 0)
        addi            rT1,rT0,4096
        LOAD_DATA(rD1, 4)
        LOAD_DATA(rD2, 8)
        LOAD_DATA(rD3, 12)
        START_IV
        SAVE_IV(rD0, 0)
        SAVE_IV(rD1, 4)
        SAVE_IV(rD2, 8)
        cmpwi           rLN,16
        SAVE_IV(rD3, 12)
        bt              lt,ppc_decrypt_cbc_end
ppc_decrypt_cbc_loop:
        mr              rKP,rKS
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_decrypt_block
        subi            rLN,rLN,16
        subi            rSP,rSP,CBC_DEC
        xor             rW0,rD0,rW0
        LOAD_DATA(rD0, 0)
        xor             rW1,rD1,rW1
        LOAD_DATA(rD1, 4)
        xor             rW2,rD2,rW2
        LOAD_DATA(rD2, 8)
        xor             rW3,rD3,rW3
        LOAD_DATA(rD3, 12)
        xor             rW0,rW0,rD0
        SAVE_DATA(rW0, 0)
        xor             rW1,rW1,rD1
        SAVE_DATA(rW1, 4)
        xor             rW2,rW2,rD2
        SAVE_DATA(rW2, 8)
        xor             rW3,rW3,rD3
        SAVE_DATA(rW3, 12)
        cmpwi           rLN,15
        subi            rDP,rDP,CBC_DEC
        bt              gt,ppc_decrypt_cbc_loop
ppc_decrypt_cbc_end:
        mr              rKP,rKS
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_decrypt_block
        xor             rW0,rW0,rD0
        xor             rW1,rW1,rD1
        xor             rW2,rW2,rD2
        xor             rW3,rW3,rD3
        xor             rW0,rW0,rI0     /* decrypt with initial IV      */
        SAVE_DATA(rW0, 0)
        xor             rW1,rW1,rI1
        SAVE_DATA(rW1, 4)
        xor             rW2,rW2,rI2
        SAVE_DATA(rW2, 8)
        xor             rW3,rW3,rI3
        SAVE_DATA(rW3, 12)
        FINALIZE_CRYPT(4)
        blr

/*
 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
 *               u32 rounds, u32 bytes, u8 *iv);
 *
 * called from glue layer to encrypt/decrypt multiple blocks
 * via CTR. Number of bytes does not need to be a multiple of
 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_crypt_ctr)
        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
        LOAD_IV(rI0, 0)
        LOAD_IV(rI1, 4)
        LOAD_IV(rI2, 8)
        cmpwi           rLN,16
        LOAD_IV(rI3, 12)
        START_IV
        bt              lt,ppc_crypt_ctr_partial
ppc_crypt_ctr_loop:
        mr              rKP,rKS
        START_KEY(rI0, rI1, rI2, rI3)
        bl              ppc_encrypt_block
        xor             rW0,rD0,rW0
        xor             rW1,rD1,rW1
        xor             rW2,rD2,rW2
        xor             rW3,rD3,rW3
        LOAD_DATA(rD0, 0)
        subi            rLN,rLN,16
        LOAD_DATA(rD1, 4)
        LOAD_DATA(rD2, 8)
        LOAD_DATA(rD3, 12)
        xor             rD0,rD0,rW0
        SAVE_DATA(rD0, 0)
        xor             rD1,rD1,rW1
        SAVE_DATA(rD1, 4)
        xor             rD2,rD2,rW2
        SAVE_DATA(rD2, 8)
        xor             rD3,rD3,rW3
        SAVE_DATA(rD3, 12)
        addic           rI3,rI3,1       /* increase counter                     */
        addze           rI2,rI2
        addze           rI1,rI1
        addze           rI0,rI0
        NEXT_BLOCK
        cmpwi           rLN,15
        bt              gt,ppc_crypt_ctr_loop
ppc_crypt_ctr_partial:
        cmpwi           rLN,0
        bt              eq,ppc_crypt_ctr_end
        mr              rKP,rKS
        START_KEY(rI0, rI1, rI2, rI3)
        bl              ppc_encrypt_block
        xor             rW0,rD0,rW0
        SAVE_IV(rW0, 0)
        xor             rW1,rD1,rW1
        SAVE_IV(rW1, 4)
        xor             rW2,rD2,rW2
        SAVE_IV(rW2, 8)
        xor             rW3,rD3,rW3
        SAVE_IV(rW3, 12)
        mtctr           rLN
        subi            rIP,rIP,CTR_DEC
        subi            rSP,rSP,1
        subi            rDP,rDP,1
ppc_crypt_ctr_xorbyte:
        lbzu            rW4,1(rIP)      /* bytewise xor for partial block       */
        lbzu            rW5,1(rSP)
        xor             rW4,rW4,rW5
        stbu            rW4,1(rDP)
        bdnz            ppc_crypt_ctr_xorbyte
        subf            rIP,rLN,rIP
        addi            rIP,rIP,1
        addic           rI3,rI3,1
        addze           rI2,rI2
        addze           rI1,rI1
        addze           rI0,rI0
ppc_crypt_ctr_end:
        SAVE_IV(rI0, 0)
        SAVE_IV(rI1, 4)
        SAVE_IV(rI2, 8)
        SAVE_IV(rI3, 12)
        FINALIZE_CRYPT(4)
        blr

/*
 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
 *                 u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
 *
 * called from glue layer to encrypt multiple blocks via XTS
 * If key_twk is given, the initial IV encryption will be
 * processed too. Round values are AES128 = 4, AES192 = 5,
 * AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_xts)
        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
        LOAD_IV(rI0, 0)
        LOAD_IV(rI1, 4)
        LOAD_IV(rI2, 8)
        cmpwi           rKT,0
        LOAD_IV(rI3, 12)
        bt              eq,ppc_encrypt_xts_notweak
        mr              rKP,rKT
        START_KEY(rI0, rI1, rI2, rI3)
        bl              ppc_encrypt_block
        xor             rI0,rD0,rW0
        xor             rI1,rD1,rW1
        xor             rI2,rD2,rW2
        xor             rI3,rD3,rW3
ppc_encrypt_xts_notweak:
        ENDIAN_SWAP(rG0, rG1, rI0, rI1)
        ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_encrypt_xts_loop:
        LOAD_DATA(rD0, 0)
        mr              rKP,rKS
        LOAD_DATA(rD1, 4)
        subi            rLN,rLN,16
        LOAD_DATA(rD2, 8)
        LOAD_DATA(rD3, 12)
        xor             rD0,rD0,rI0
        xor             rD1,rD1,rI1
        xor             rD2,rD2,rI2
        xor             rD3,rD3,rI3
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_encrypt_block
        xor             rD0,rD0,rW0
        xor             rD1,rD1,rW1
        xor             rD2,rD2,rW2
        xor             rD3,rD3,rW3
        xor             rD0,rD0,rI0
        SAVE_DATA(rD0, 0)
        xor             rD1,rD1,rI1
        SAVE_DATA(rD1, 4)
        xor             rD2,rD2,rI2
        SAVE_DATA(rD2, 8)
        xor             rD3,rD3,rI3
        SAVE_DATA(rD3, 12)
        GF128_MUL(rG0, rG1, rG2, rG3, rW0)
        ENDIAN_SWAP(rI0, rI1, rG0, rG1)
        ENDIAN_SWAP(rI2, rI3, rG2, rG3)
        cmpwi           rLN,0
        NEXT_BLOCK
        bt              gt,ppc_encrypt_xts_loop
        START_IV
        SAVE_IV(rI0, 0)
        SAVE_IV(rI1, 4)
        SAVE_IV(rI2, 8)
        SAVE_IV(rI3, 12)
        FINALIZE_CRYPT(8)
        blr

/*
 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
 *                 u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
 *
 * called from glue layer to decrypt multiple blocks via XTS
 * If key_twk is given, the initial IV encryption will be
 * processed too. Round values are AES128 = 4, AES192 = 5,
 * AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_xts)
        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
        LOAD_IV(rI0, 0)
        addi            rT1,rT0,4096
        LOAD_IV(rI1, 4)
        LOAD_IV(rI2, 8)
        cmpwi           rKT,0
        LOAD_IV(rI3, 12)
        bt              eq,ppc_decrypt_xts_notweak
        subi            rT0,rT0,4096
        mr              rKP,rKT
        START_KEY(rI0, rI1, rI2, rI3)
        bl              ppc_encrypt_block
        xor             rI0,rD0,rW0
        xor             rI1,rD1,rW1
        xor             rI2,rD2,rW2
        xor             rI3,rD3,rW3
        addi            rT0,rT0,4096
ppc_decrypt_xts_notweak:
        ENDIAN_SWAP(rG0, rG1, rI0, rI1)
        ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_decrypt_xts_loop:
        LOAD_DATA(rD0, 0)
        mr              rKP,rKS
        LOAD_DATA(rD1, 4)
        subi            rLN,rLN,16
        LOAD_DATA(rD2, 8)
        LOAD_DATA(rD3, 12)
        xor             rD0,rD0,rI0
        xor             rD1,rD1,rI1
        xor             rD2,rD2,rI2
        xor             rD3,rD3,rI3
        START_KEY(rD0, rD1, rD2, rD3)
        bl              ppc_decrypt_block
        xor             rD0,rD0,rW0
        xor             rD1,rD1,rW1
        xor             rD2,rD2,rW2
        xor             rD3,rD3,rW3
        xor             rD0,rD0,rI0
        SAVE_DATA(rD0, 0)
        xor             rD1,rD1,rI1
        SAVE_DATA(rD1, 4)
        xor             rD2,rD2,rI2
        SAVE_DATA(rD2, 8)
        xor             rD3,rD3,rI3
        SAVE_DATA(rD3, 12)
        GF128_MUL(rG0, rG1, rG2, rG3, rW0)
        ENDIAN_SWAP(rI0, rI1, rG0, rG1)
        ENDIAN_SWAP(rI2, rI3, rG2, rG3)
        cmpwi           rLN,0
        NEXT_BLOCK
        bt              gt,ppc_decrypt_xts_loop
        START_IV
        SAVE_IV(rI0, 0)
        SAVE_IV(rI1, 4)
        SAVE_IV(rI2, 8)
        SAVE_IV(rI3, 12)
        FINALIZE_CRYPT(8)
        blr