c918d8e283
Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/7771)
1382 lines
43 KiB
Perl
1382 lines
43 KiB
Perl
#! /usr/bin/env perl
|
|
# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
|
|
#
|
|
# ====================================================================
|
|
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
# project. The module is, however, dual licensed under OpenSSL and
|
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
|
# ====================================================================
|
|
#
|
|
# [Endian-neutral] AES for C64x+.
|
|
#
|
|
# Even though SPLOOPs are scheduled for 13 cycles, and thus expected
|
|
# performance is ~8.5 cycles per byte processed with 128-bit key,
|
|
# measured performance turned to be ~10 cycles per byte. Discrepancy
|
|
# must be caused by limitations of L1D memory banking(*), see SPRU871
|
|
# TI publication for further details. If any consolation it's still
|
|
# ~20% faster than TI's linear assembly module anyway... Compared to
|
|
# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
|
|
# code is 3.75x faster and almost 3x smaller (tables included).
|
|
#
|
|
# (*) This means that there might be subtle correlation between data
|
|
# and timing and one can wonder if it can be ... attacked:-(
|
|
# On the other hand this also means that *if* one chooses to
|
|
# implement *4* T-tables variant [instead of 1 T-table as in
|
|
# this implementation, or in addition to], then one ought to
|
|
# *interleave* them. Even though it complicates addressing,
|
|
# references to interleaved tables would be guaranteed not to
|
|
# clash. I reckon that it should be possible to break 8 cycles
|
|
# per byte "barrier," i.e. improve by ~20%, naturally at the
|
|
# cost of 8x increased pressure on L1D. 8x because you'd have
|
|
# to interleave both Te and Td tables...
|
|
|
|
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
|
open STDOUT,">$output";
|
|
|
|
($TEA,$TEB)=("A5","B5");
|
|
($KPA,$KPB)=("A3","B1");
|
|
@K=("A6","B6","A7","B7");
|
|
@s=("A8","B8","A9","B9");
|
|
@Te0=@Td0=("A16","B16","A17","B17");
|
|
@Te1=@Td1=("A18","B18","A19","B19");
|
|
@Te2=@Td2=("A20","B20","A21","B21");
|
|
@Te3=@Td3=("A22","B22","A23","B23");
|
|
|
|
$code=<<___;
|
|
.text
|
|
|
|
.if .ASSEMBLER_VERSION<7000000
|
|
.asg 0,__TI_EABI__
|
|
.endif
|
|
.if __TI_EABI__
|
|
.nocmp
|
|
.asg AES_encrypt,_AES_encrypt
|
|
.asg AES_decrypt,_AES_decrypt
|
|
.asg AES_set_encrypt_key,_AES_set_encrypt_key
|
|
.asg AES_set_decrypt_key,_AES_set_decrypt_key
|
|
.asg AES_ctr32_encrypt,_AES_ctr32_encrypt
|
|
.endif
|
|
|
|
.asg B3,RA
|
|
.asg A4,INP
|
|
.asg B4,OUT
|
|
.asg A6,KEY
|
|
.asg A4,RET
|
|
.asg B15,SP
|
|
|
|
.eval 24,EXT0
|
|
.eval 16,EXT1
|
|
.eval 8,EXT2
|
|
.eval 0,EXT3
|
|
.eval 8,TBL1
|
|
.eval 16,TBL2
|
|
.eval 24,TBL3
|
|
|
|
.if .BIG_ENDIAN
|
|
.eval 24-EXT0,EXT0
|
|
.eval 24-EXT1,EXT1
|
|
.eval 24-EXT2,EXT2
|
|
.eval 24-EXT3,EXT3
|
|
.eval 32-TBL1,TBL1
|
|
.eval 32-TBL2,TBL2
|
|
.eval 32-TBL3,TBL3
|
|
.endif
|
|
|
|
.global _AES_encrypt
|
|
_AES_encrypt:
|
|
.asmfunc
|
|
MVK 1,B2
|
|
__encrypt:
|
|
.if __TI_EABI__
|
|
[B2] LDNDW *INP++,A9:A8 ; load input
|
|
|| MVKL \$PCR_OFFSET(AES_Te,__encrypt),$TEA
|
|
|| ADDKPC __encrypt,B0
|
|
[B2] LDNDW *INP++,B9:B8
|
|
|| MVKH \$PCR_OFFSET(AES_Te,__encrypt),$TEA
|
|
|| ADD 0,KEY,$KPA
|
|
|| ADD 4,KEY,$KPB
|
|
.else
|
|
[B2] LDNDW *INP++,A9:A8 ; load input
|
|
|| MVKL (AES_Te-__encrypt),$TEA
|
|
|| ADDKPC __encrypt,B0
|
|
[B2] LDNDW *INP++,B9:B8
|
|
|| MVKH (AES_Te-__encrypt),$TEA
|
|
|| ADD 0,KEY,$KPA
|
|
|| ADD 4,KEY,$KPB
|
|
.endif
|
|
LDW *$KPA++[2],$Te0[0] ; zero round key
|
|
|| LDW *$KPB++[2],$Te0[1]
|
|
|| MVK 60,A0
|
|
|| ADD B0,$TEA,$TEA ; AES_Te
|
|
LDW *KEY[A0],B0 ; rounds
|
|
|| MVK 1024,A0 ; sizeof(AES_Te)
|
|
LDW *$KPA++[2],$Te0[2]
|
|
|| LDW *$KPB++[2],$Te0[3]
|
|
|| MV $TEA,$TEB
|
|
NOP
|
|
.if .BIG_ENDIAN
|
|
MV A9,$s[0]
|
|
|| MV A8,$s[1]
|
|
|| MV B9,$s[2]
|
|
|| MV B8,$s[3]
|
|
.else
|
|
MV A8,$s[0]
|
|
|| MV A9,$s[1]
|
|
|| MV B8,$s[2]
|
|
|| MV B9,$s[3]
|
|
.endif
|
|
XOR $Te0[0],$s[0],$s[0]
|
|
|| XOR $Te0[1],$s[1],$s[1]
|
|
|| LDW *$KPA++[2],$K[0] ; 1st round key
|
|
|| LDW *$KPB++[2],$K[1]
|
|
SUB B0,2,B0
|
|
|
|
SPLOOPD 13
|
|
|| MVC B0,ILC
|
|
|| LDW *$KPA++[2],$K[2]
|
|
|| LDW *$KPB++[2],$K[3]
|
|
;;====================================================================
|
|
EXTU $s[1],EXT1,24,$Te1[1]
|
|
|| EXTU $s[0],EXT3,24,$Te3[0]
|
|
LDW *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0
|
|
|| LDW *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1
|
|
|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled
|
|
|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled
|
|
|| EXTU $s[1],EXT3,24,$Te3[1]
|
|
|| EXTU $s[0],EXT1,24,$Te1[0]
|
|
LDW *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2
|
|
|| LDW *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3
|
|
|| EXTU $s[2],EXT2,24,$Te2[2]
|
|
|| EXTU $s[3],EXT2,24,$Te2[3]
|
|
LDW *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0
|
|
|| LDW *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1
|
|
|| EXTU $s[3],EXT3,24,$Te3[3]
|
|
|| EXTU $s[2],EXT1,24,$Te1[2]
|
|
LDW *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0
|
|
|| LDW *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1
|
|
|| EXTU $s[0],EXT2,24,$Te2[0]
|
|
|| EXTU $s[1],EXT2,24,$Te2[1]
|
|
LDW *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2
|
|
|| LDW *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3
|
|
|| EXTU $s[3],EXT1,24,$Te1[3]
|
|
|| EXTU $s[2],EXT3,24,$Te3[2]
|
|
LDW *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2
|
|
|| LDW *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3
|
|
|| ROTL $Te1[1],TBL1,$Te3[0] ; t0
|
|
|| ROTL $Te3[0],TBL3,$Te1[1] ; t1
|
|
|| EXTU $s[0],EXT0,24,$Te0[0]
|
|
|| EXTU $s[1],EXT0,24,$Te0[1]
|
|
LDW *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0
|
|
|| LDW *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1
|
|
|| ROTL $Te3[1],TBL3,$Te1[0] ; t2
|
|
|| ROTL $Te1[0],TBL1,$Te3[1] ; t3
|
|
|| EXTU $s[2],EXT0,24,$Te0[2]
|
|
|| EXTU $s[3],EXT0,24,$Te0[3]
|
|
LDW *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2
|
|
|| LDW *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3
|
|
|| ROTL $Te2[2],TBL2,$Te2[2] ; t0
|
|
|| ROTL $Te2[3],TBL2,$Te2[3] ; t1
|
|
|| XOR $K[0],$Te3[0],$s[0]
|
|
|| XOR $K[1],$Te1[1],$s[1]
|
|
ROTL $Te3[3],TBL3,$Te1[2] ; t0
|
|
|| ROTL $Te1[2],TBL1,$Te3[3] ; t1
|
|
|| XOR $K[2],$Te1[0],$s[2]
|
|
|| XOR $K[3],$Te3[1],$s[3]
|
|
|| LDW *$KPA++[2],$K[0] ; next round key
|
|
|| LDW *$KPB++[2],$K[1]
|
|
ROTL $Te2[0],TBL2,$Te2[0] ; t2
|
|
|| ROTL $Te2[1],TBL2,$Te2[1] ; t3
|
|
|| XOR $s[0],$Te2[2],$s[0]
|
|
|| XOR $s[1],$Te2[3],$s[1]
|
|
|| LDW *$KPA++[2],$K[2]
|
|
|| LDW *$KPB++[2],$K[3]
|
|
ROTL $Te1[3],TBL1,$Te3[2] ; t2
|
|
|| ROTL $Te3[2],TBL3,$Te1[3] ; t3
|
|
|| XOR $s[0],$Te1[2],$s[0]
|
|
|| XOR $s[1],$Te3[3],$s[1]
|
|
XOR $s[2],$Te2[0],$s[2]
|
|
|| XOR $s[3],$Te2[1],$s[3]
|
|
|| XOR $s[0],$Te0[0],$s[0]
|
|
|| XOR $s[1],$Te0[1],$s[1]
|
|
SPKERNEL
|
|
|| XOR.L $s[2],$Te3[2],$s[2]
|
|
|| XOR.L $s[3],$Te1[3],$s[3]
|
|
;;====================================================================
|
|
ADD.D ${TEA},A0,${TEA} ; point to Te4
|
|
|| ADD.D ${TEB},A0,${TEB}
|
|
|| EXTU $s[1],EXT1,24,$Te1[1]
|
|
|| EXTU $s[0],EXT3,24,$Te3[0]
|
|
LDBU *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0
|
|
|| LDBU *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1
|
|
|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled
|
|
|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled
|
|
|| EXTU $s[0],EXT0,24,$Te0[0]
|
|
|| EXTU $s[1],EXT0,24,$Te0[1]
|
|
LDBU *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0
|
|
|| LDBU *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1
|
|
|| EXTU $s[3],EXT3,24,$Te3[3]
|
|
|| EXTU $s[2],EXT1,24,$Te1[2]
|
|
LDBU *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0
|
|
|| LDBU *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1
|
|
|| EXTU $s[2],EXT2,24,$Te2[2]
|
|
|| EXTU $s[3],EXT2,24,$Te2[3]
|
|
LDBU *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0
|
|
|| LDBU *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1
|
|
|| EXTU $s[1],EXT3,24,$Te3[1]
|
|
|| EXTU $s[0],EXT1,24,$Te1[0]
|
|
LDBU *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2
|
|
|| LDBU *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3
|
|
|| EXTU $s[3],EXT1,24,$Te1[3]
|
|
|| EXTU $s[2],EXT3,24,$Te3[2]
|
|
LDBU *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2
|
|
|| LDBU *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3
|
|
|| EXTU $s[2],EXT0,24,$Te0[2]
|
|
|| EXTU $s[3],EXT0,24,$Te0[3]
|
|
LDBU *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2
|
|
|| LDBU *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3
|
|
|| EXTU $s[0],EXT2,24,$Te2[0]
|
|
|| EXTU $s[1],EXT2,24,$Te2[1]
|
|
LDBU *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2
|
|
|| LDBU *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3
|
|
|
|
.if .BIG_ENDIAN
|
|
PACK2 $Te0[0],$Te1[1],$Te0[0]
|
|
|| PACK2 $Te0[1],$Te1[2],$Te0[1]
|
|
PACK2 $Te2[2],$Te3[3],$Te2[2]
|
|
|| PACK2 $Te2[3],$Te3[0],$Te2[3]
|
|
PACKL4 $Te0[0],$Te2[2],$Te0[0]
|
|
|| PACKL4 $Te0[1],$Te2[3],$Te0[1]
|
|
XOR $K[0],$Te0[0],$Te0[0] ; s[0]
|
|
|| XOR $K[1],$Te0[1],$Te0[1] ; s[1]
|
|
|
|
PACK2 $Te0[2],$Te1[3],$Te0[2]
|
|
|| PACK2 $Te0[3],$Te1[0],$Te0[3]
|
|
PACK2 $Te2[0],$Te3[1],$Te2[0]
|
|
|| PACK2 $Te2[1],$Te3[2],$Te2[1]
|
|
|| BNOP RA
|
|
PACKL4 $Te0[2],$Te2[0],$Te0[2]
|
|
|| PACKL4 $Te0[3],$Te2[1],$Te0[3]
|
|
XOR $K[2],$Te0[2],$Te0[2] ; s[2]
|
|
|| XOR $K[3],$Te0[3],$Te0[3] ; s[3]
|
|
|
|
MV $Te0[0],A9
|
|
|| MV $Te0[1],A8
|
|
MV $Te0[2],B9
|
|
|| MV $Te0[3],B8
|
|
|| [B2] STNDW A9:A8,*OUT++
|
|
[B2] STNDW B9:B8,*OUT++
|
|
.else
|
|
PACK2 $Te1[1],$Te0[0],$Te1[1]
|
|
|| PACK2 $Te1[2],$Te0[1],$Te1[2]
|
|
PACK2 $Te3[3],$Te2[2],$Te3[3]
|
|
|| PACK2 $Te3[0],$Te2[3],$Te3[0]
|
|
PACKL4 $Te3[3],$Te1[1],$Te1[1]
|
|
|| PACKL4 $Te3[0],$Te1[2],$Te1[2]
|
|
XOR $K[0],$Te1[1],$Te1[1] ; s[0]
|
|
|| XOR $K[1],$Te1[2],$Te1[2] ; s[1]
|
|
|
|
PACK2 $Te1[3],$Te0[2],$Te1[3]
|
|
|| PACK2 $Te1[0],$Te0[3],$Te1[0]
|
|
PACK2 $Te3[1],$Te2[0],$Te3[1]
|
|
|| PACK2 $Te3[2],$Te2[1],$Te3[2]
|
|
|| BNOP RA
|
|
PACKL4 $Te3[1],$Te1[3],$Te1[3]
|
|
|| PACKL4 $Te3[2],$Te1[0],$Te1[0]
|
|
XOR $K[2],$Te1[3],$Te1[3] ; s[2]
|
|
|| XOR $K[3],$Te1[0],$Te1[0] ; s[3]
|
|
|
|
MV $Te1[1],A8
|
|
|| MV $Te1[2],A9
|
|
MV $Te1[3],B8
|
|
|| MV $Te1[0],B9
|
|
|| [B2] STNDW A9:A8,*OUT++
|
|
[B2] STNDW B9:B8,*OUT++
|
|
.endif
|
|
.endasmfunc
|
|
|
|
.global _AES_decrypt
|
|
_AES_decrypt:
|
|
.asmfunc
|
|
MVK 1,B2
|
|
__decrypt:
|
|
.if __TI_EABI__
|
|
[B2] LDNDW *INP++,A9:A8 ; load input
|
|
|| MVKL \$PCR_OFFSET(AES_Td,__decrypt),$TEA
|
|
|| ADDKPC __decrypt,B0
|
|
[B2] LDNDW *INP++,B9:B8
|
|
|| MVKH \$PCR_OFFSET(AES_Td,__decrypt),$TEA
|
|
|| ADD 0,KEY,$KPA
|
|
|| ADD 4,KEY,$KPB
|
|
.else
|
|
[B2] LDNDW *INP++,A9:A8 ; load input
|
|
|| MVKL (AES_Td-__decrypt),$TEA
|
|
|| ADDKPC __decrypt,B0
|
|
[B2] LDNDW *INP++,B9:B8
|
|
|| MVKH (AES_Td-__decrypt),$TEA
|
|
|| ADD 0,KEY,$KPA
|
|
|| ADD 4,KEY,$KPB
|
|
.endif
|
|
LDW *$KPA++[2],$Td0[0] ; zero round key
|
|
|| LDW *$KPB++[2],$Td0[1]
|
|
|| MVK 60,A0
|
|
|| ADD B0,$TEA,$TEA ; AES_Td
|
|
LDW *KEY[A0],B0 ; rounds
|
|
|| MVK 1024,A0 ; sizeof(AES_Td)
|
|
LDW *$KPA++[2],$Td0[2]
|
|
|| LDW *$KPB++[2],$Td0[3]
|
|
|| MV $TEA,$TEB
|
|
NOP
|
|
.if .BIG_ENDIAN
|
|
MV A9,$s[0]
|
|
|| MV A8,$s[1]
|
|
|| MV B9,$s[2]
|
|
|| MV B8,$s[3]
|
|
.else
|
|
MV A8,$s[0]
|
|
|| MV A9,$s[1]
|
|
|| MV B8,$s[2]
|
|
|| MV B9,$s[3]
|
|
.endif
|
|
XOR $Td0[0],$s[0],$s[0]
|
|
|| XOR $Td0[1],$s[1],$s[1]
|
|
|| LDW *$KPA++[2],$K[0] ; 1st round key
|
|
|| LDW *$KPB++[2],$K[1]
|
|
SUB B0,2,B0
|
|
|
|
SPLOOPD 13
|
|
|| MVC B0,ILC
|
|
|| LDW *$KPA++[2],$K[2]
|
|
|| LDW *$KPB++[2],$K[3]
|
|
;;====================================================================
|
|
EXTU $s[1],EXT3,24,$Td3[1]
|
|
|| EXTU $s[0],EXT1,24,$Td1[0]
|
|
LDW *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0
|
|
|| LDW *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1
|
|
|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled
|
|
|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled
|
|
|| EXTU $s[1],EXT1,24,$Td1[1]
|
|
|| EXTU $s[0],EXT3,24,$Td3[0]
|
|
LDW *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2
|
|
|| LDW *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3
|
|
|| EXTU $s[2],EXT2,24,$Td2[2]
|
|
|| EXTU $s[3],EXT2,24,$Td2[3]
|
|
LDW *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0
|
|
|| LDW *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1
|
|
|| EXTU $s[3],EXT1,24,$Td1[3]
|
|
|| EXTU $s[2],EXT3,24,$Td3[2]
|
|
LDW *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0
|
|
|| LDW *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1
|
|
|| EXTU $s[0],EXT2,24,$Td2[0]
|
|
|| EXTU $s[1],EXT2,24,$Td2[1]
|
|
LDW *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2
|
|
|| LDW *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3
|
|
|| EXTU $s[3],EXT3,24,$Td3[3]
|
|
|| EXTU $s[2],EXT1,24,$Td1[2]
|
|
LDW *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2
|
|
|| LDW *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3
|
|
|| ROTL $Td3[1],TBL3,$Td1[0] ; t0
|
|
|| ROTL $Td1[0],TBL1,$Td3[1] ; t1
|
|
|| EXTU $s[0],EXT0,24,$Td0[0]
|
|
|| EXTU $s[1],EXT0,24,$Td0[1]
|
|
LDW *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0
|
|
|| LDW *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1
|
|
|| ROTL $Td1[1],TBL1,$Td3[0] ; t2
|
|
|| ROTL $Td3[0],TBL3,$Td1[1] ; t3
|
|
|| EXTU $s[2],EXT0,24,$Td0[2]
|
|
|| EXTU $s[3],EXT0,24,$Td0[3]
|
|
LDW *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2
|
|
|| LDW *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3
|
|
|| ROTL $Td2[2],TBL2,$Td2[2] ; t0
|
|
|| ROTL $Td2[3],TBL2,$Td2[3] ; t1
|
|
|| XOR $K[0],$Td1[0],$s[0]
|
|
|| XOR $K[1],$Td3[1],$s[1]
|
|
ROTL $Td1[3],TBL1,$Td3[2] ; t0
|
|
|| ROTL $Td3[2],TBL3,$Td1[3] ; t1
|
|
|| XOR $K[2],$Td3[0],$s[2]
|
|
|| XOR $K[3],$Td1[1],$s[3]
|
|
|| LDW *$KPA++[2],$K[0] ; next round key
|
|
|| LDW *$KPB++[2],$K[1]
|
|
ROTL $Td2[0],TBL2,$Td2[0] ; t2
|
|
|| ROTL $Td2[1],TBL2,$Td2[1] ; t3
|
|
|| XOR $s[0],$Td2[2],$s[0]
|
|
|| XOR $s[1],$Td2[3],$s[1]
|
|
|| LDW *$KPA++[2],$K[2]
|
|
|| LDW *$KPB++[2],$K[3]
|
|
ROTL $Td3[3],TBL3,$Td1[2] ; t2
|
|
|| ROTL $Td1[2],TBL1,$Td3[3] ; t3
|
|
|| XOR $s[0],$Td3[2],$s[0]
|
|
|| XOR $s[1],$Td1[3],$s[1]
|
|
XOR $s[2],$Td2[0],$s[2]
|
|
|| XOR $s[3],$Td2[1],$s[3]
|
|
|| XOR $s[0],$Td0[0],$s[0]
|
|
|| XOR $s[1],$Td0[1],$s[1]
|
|
SPKERNEL
|
|
|| XOR.L $s[2],$Td1[2],$s[2]
|
|
|| XOR.L $s[3],$Td3[3],$s[3]
|
|
;;====================================================================
|
|
ADD.D ${TEA},A0,${TEA} ; point to Td4
|
|
|| ADD.D ${TEB},A0,${TEB}
|
|
|| EXTU $s[1],EXT3,24,$Td3[1]
|
|
|| EXTU $s[0],EXT1,24,$Td1[0]
|
|
LDBU *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0
|
|
|| LDBU *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1
|
|
|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled
|
|
|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled
|
|
|| EXTU $s[0],EXT0,24,$Td0[0]
|
|
|| EXTU $s[1],EXT0,24,$Td0[1]
|
|
LDBU *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0
|
|
|| LDBU *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1
|
|
|| EXTU $s[2],EXT2,24,$Td2[2]
|
|
|| EXTU $s[3],EXT2,24,$Td2[3]
|
|
LDBU *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0
|
|
|| LDBU *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1
|
|
|| EXTU $s[3],EXT1,24,$Td1[3]
|
|
|| EXTU $s[2],EXT3,24,$Td3[2]
|
|
LDBU *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0
|
|
|| LDBU *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1
|
|
|| EXTU $s[1],EXT1,24,$Td1[1]
|
|
|| EXTU $s[0],EXT3,24,$Td3[0]
|
|
LDBU *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2
|
|
|| LDBU *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3
|
|
|| EXTU $s[0],EXT2,24,$Td2[0]
|
|
|| EXTU $s[1],EXT2,24,$Td2[1]
|
|
LDBU *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2
|
|
|| LDBU *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3
|
|
|| EXTU $s[3],EXT3,24,$Td3[3]
|
|
|| EXTU $s[2],EXT1,24,$Td1[2]
|
|
LDBU *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2
|
|
|| LDBU *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3
|
|
|| EXTU $s[2],EXT0,24,$Td0[2]
|
|
|| EXTU $s[3],EXT0,24,$Td0[3]
|
|
LDBU *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2
|
|
|| LDBU *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3
|
|
|
|
.if .BIG_ENDIAN
|
|
PACK2 $Td0[0],$Td1[3],$Td0[0]
|
|
|| PACK2 $Td0[1],$Td1[0],$Td0[1]
|
|
PACK2 $Td2[2],$Td3[1],$Td2[2]
|
|
|| PACK2 $Td2[3],$Td3[2],$Td2[3]
|
|
PACKL4 $Td0[0],$Td2[2],$Td0[0]
|
|
|| PACKL4 $Td0[1],$Td2[3],$Td0[1]
|
|
XOR $K[0],$Td0[0],$Td0[0] ; s[0]
|
|
|| XOR $K[1],$Td0[1],$Td0[1] ; s[1]
|
|
|
|
PACK2 $Td0[2],$Td1[1],$Td0[2]
|
|
|| PACK2 $Td0[3],$Td1[2],$Td0[3]
|
|
PACK2 $Td2[0],$Td3[3],$Td2[0]
|
|
|| PACK2 $Td2[1],$Td3[0],$Td2[1]
|
|
|| BNOP RA
|
|
PACKL4 $Td0[2],$Td2[0],$Td0[2]
|
|
|| PACKL4 $Td0[3],$Td2[1],$Td0[3]
|
|
XOR $K[2],$Td0[2],$Td0[2] ; s[2]
|
|
|| XOR $K[3],$Td0[3],$Td0[3] ; s[3]
|
|
|
|
MV $Td0[0],A9
|
|
|| MV $Td0[1],A8
|
|
MV $Td0[2],B9
|
|
|| MV $Td0[3],B8
|
|
|| [B2] STNDW A9:A8,*OUT++
|
|
[B2] STNDW B9:B8,*OUT++
|
|
.else
|
|
PACK2 $Td1[3],$Td0[0],$Td1[3]
|
|
|| PACK2 $Td1[0],$Td0[1],$Td1[0]
|
|
PACK2 $Td3[1],$Td2[2],$Td3[1]
|
|
|| PACK2 $Td3[2],$Td2[3],$Td3[2]
|
|
PACKL4 $Td3[1],$Td1[3],$Td1[3]
|
|
|| PACKL4 $Td3[2],$Td1[0],$Td1[0]
|
|
XOR $K[0],$Td1[3],$Td1[3] ; s[0]
|
|
|| XOR $K[1],$Td1[0],$Td1[0] ; s[1]
|
|
|
|
PACK2 $Td1[1],$Td0[2],$Td1[1]
|
|
|| PACK2 $Td1[2],$Td0[3],$Td1[2]
|
|
PACK2 $Td3[3],$Td2[0],$Td3[3]
|
|
|| PACK2 $Td3[0],$Td2[1],$Td3[0]
|
|
|| BNOP RA
|
|
PACKL4 $Td3[3],$Td1[1],$Td1[1]
|
|
|| PACKL4 $Td3[0],$Td1[2],$Td1[2]
|
|
XOR $K[2],$Td1[1],$Td1[1] ; s[2]
|
|
|| XOR $K[3],$Td1[2],$Td1[2] ; s[3]
|
|
|
|
MV $Td1[3],A8
|
|
|| MV $Td1[0],A9
|
|
MV $Td1[1],B8
|
|
|| MV $Td1[2],B9
|
|
|| [B2] STNDW A9:A8,*OUT++
|
|
[B2] STNDW B9:B8,*OUT++
|
|
.endif
|
|
.endasmfunc
|
|
___
|
|
{
|
|
my @K=(@K,@s); # extended key
|
|
my @Te4=map("B$_",(16..19));
|
|
|
|
my @Kx9=@Te0; # used in AES_set_decrypt_key
|
|
my @KxB=@Te1;
|
|
my @KxD=@Te2;
|
|
my @KxE=@Te3;
|
|
|
|
$code.=<<___;
|
|
.asg OUT,BITS
|
|
|
|
.global _AES_set_encrypt_key
|
|
_AES_set_encrypt_key:
|
|
__set_encrypt_key:
|
|
.asmfunc
|
|
MV INP,A0
|
|
|| SHRU BITS,5,BITS ; 128-192-256 -> 4-6-8
|
|
|| MV KEY,A1
|
|
[!A0] B RA
|
|
||[!A0] MVK -1,RET
|
|
||[!A0] MVK 1,A1 ; only one B RA
|
|
[!A1] B RA
|
|
||[!A1] MVK -1,RET
|
|
||[!A1] MVK 0,A0
|
|
|| MVK 0,B0
|
|
|| MVK 0,A1
|
|
[A0] LDNDW *INP++,A9:A8
|
|
|| [A0] CMPEQ 4,BITS,B0
|
|
|| [A0] CMPLT 3,BITS,A1
|
|
[B0] B key128?
|
|
|| [A1] LDNDW *INP++,B9:B8
|
|
|| [A0] CMPEQ 6,BITS,B0
|
|
|| [A0] CMPLT 5,BITS,A1
|
|
[B0] B key192?
|
|
|| [A1] LDNDW *INP++,B17:B16
|
|
|| [A0] CMPEQ 8,BITS,B0
|
|
|| [A0] CMPLT 7,BITS,A1
|
|
[B0] B key256?
|
|
|| [A1] LDNDW *INP++,B19:B18
|
|
|
|
.if __TI_EABI__
|
|
[A0] ADD 0,KEY,$KPA
|
|
|| [A0] ADD 4,KEY,$KPB
|
|
|| [A0] MVKL \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
|
|
|| [A0] ADDKPC __set_encrypt_key,B6
|
|
[A0] MVKH \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
|
|
[A0] ADD B6,$TEA,$TEA ; AES_Te4
|
|
.else
|
|
[A0] ADD 0,KEY,$KPA
|
|
|| [A0] ADD 4,KEY,$KPB
|
|
|| [A0] MVKL (AES_Te4-__set_encrypt_key),$TEA
|
|
|| [A0] ADDKPC __set_encrypt_key,B6
|
|
[A0] MVKH (AES_Te4-__set_encrypt_key),$TEA
|
|
[A0] ADD B6,$TEA,$TEA ; AES_Te4
|
|
.endif
|
|
NOP
|
|
NOP
|
|
|
|
BNOP RA,5
|
|
|| MVK -2,RET ; unknown bit length
|
|
|| MVK 0,B0 ; redundant
|
|
;;====================================================================
|
|
;;====================================================================
|
|
key128?:
|
|
.if .BIG_ENDIAN
|
|
MV A9,$K[0]
|
|
|| MV A8,$K[1]
|
|
|| MV B9,$Te4[2]
|
|
|| MV B8,$K[3]
|
|
.else
|
|
MV A8,$K[0]
|
|
|| MV A9,$K[1]
|
|
|| MV B8,$Te4[2]
|
|
|| MV B9,$K[3]
|
|
.endif
|
|
|
|
MVK 256,A0
|
|
|| MVK 9,B0
|
|
|
|
SPLOOPD 14
|
|
|| MVC B0,ILC
|
|
|| MV $TEA,$TEB
|
|
|| ADD $TEA,A0,A30 ; rcon
|
|
;;====================================================================
|
|
LDW *A30++[1],A31 ; rcon[i]
|
|
|| MV $Te4[2],$K[2]
|
|
|| EXTU $K[3],EXT1,24,$Te4[0]
|
|
LDBU *${TEB}[$Te4[0]],$Te4[0]
|
|
|| MV $K[3],A0
|
|
|| EXTU $K[3],EXT2,24,$Te4[1]
|
|
LDBU *${TEB}[$Te4[1]],$Te4[1]
|
|
|| EXTU A0,EXT3,24,A0
|
|
|| EXTU $K[3],EXT0,24,$Te4[3]
|
|
.if .BIG_ENDIAN
|
|
LDBU *${TEA}[A0],$Te4[3]
|
|
|| LDBU *${TEB}[$Te4[3]],A0
|
|
.else
|
|
LDBU *${TEA}[A0],A0
|
|
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
|
|
.endif
|
|
|
|
STW $K[0],*$KPA++[2]
|
|
|| STW $K[1],*$KPB++[2]
|
|
STW $K[2],*$KPA++[2]
|
|
|| STW $K[3],*$KPB++[2]
|
|
|
|
XOR A31,$K[0],$K[0] ; ^=rcon[i]
|
|
.if .BIG_ENDIAN
|
|
PACK2 $Te4[0],$Te4[1],$Te4[1]
|
|
PACK2 $Te4[3],A0,$Te4[3]
|
|
PACKL4 $Te4[1],$Te4[3],$Te4[3]
|
|
.else
|
|
PACK2 $Te4[1],$Te4[0],$Te4[1]
|
|
PACK2 $Te4[3],A0,$Te4[3]
|
|
PACKL4 $Te4[3],$Te4[1],$Te4[3]
|
|
.endif
|
|
XOR $Te4[3],$K[0],$Te4[0] ; K[0]
|
|
XOR $Te4[0],$K[1],$K[1] ; K[1]
|
|
MV $Te4[0],$K[0]
|
|
|| XOR $K[1],$K[2],$Te4[2] ; K[2]
|
|
XOR $Te4[2],$K[3],$K[3] ; K[3]
|
|
SPKERNEL
|
|
;;====================================================================
|
|
BNOP RA
|
|
MV $Te4[2],$K[2]
|
|
|| STW $K[0],*$KPA++[2]
|
|
|| STW $K[1],*$KPB++[2]
|
|
STW $K[2],*$KPA++[2]
|
|
|| STW $K[3],*$KPB++[2]
|
|
MVK 10,B0 ; rounds
|
|
STW B0,*++${KPB}[15]
|
|
MVK 0,RET
|
|
;;====================================================================
|
|
;;====================================================================
|
|
key192?:
|
|
.if .BIG_ENDIAN
|
|
MV A9,$K[0]
|
|
|| MV A8,$K[1]
|
|
|| MV B9,$K[2]
|
|
|| MV B8,$K[3]
|
|
MV B17,$Te4[2]
|
|
|| MV B16,$K[5]
|
|
.else
|
|
MV A8,$K[0]
|
|
|| MV A9,$K[1]
|
|
|| MV B8,$K[2]
|
|
|| MV B9,$K[3]
|
|
MV B16,$Te4[2]
|
|
|| MV B17,$K[5]
|
|
.endif
|
|
|
|
MVK 256,A0
|
|
|| MVK 6,B0
|
|
MV $TEA,$TEB
|
|
|| ADD $TEA,A0,A30 ; rcon
|
|
;;====================================================================
|
|
loop192?:
|
|
LDW *A30++[1],A31 ; rcon[i]
|
|
|| MV $Te4[2],$K[4]
|
|
|| EXTU $K[5],EXT1,24,$Te4[0]
|
|
LDBU *${TEB}[$Te4[0]],$Te4[0]
|
|
|| MV $K[5],A0
|
|
|| EXTU $K[5],EXT2,24,$Te4[1]
|
|
LDBU *${TEB}[$Te4[1]],$Te4[1]
|
|
|| EXTU A0,EXT3,24,A0
|
|
|| EXTU $K[5],EXT0,24,$Te4[3]
|
|
.if .BIG_ENDIAN
|
|
LDBU *${TEA}[A0],$Te4[3]
|
|
|| LDBU *${TEB}[$Te4[3]],A0
|
|
.else
|
|
LDBU *${TEA}[A0],A0
|
|
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
|
|
.endif
|
|
|
|
STW $K[0],*$KPA++[2]
|
|
|| STW $K[1],*$KPB++[2]
|
|
STW $K[2],*$KPA++[2]
|
|
|| STW $K[3],*$KPB++[2]
|
|
STW $K[4],*$KPA++[2]
|
|
|| STW $K[5],*$KPB++[2]
|
|
|
|
XOR A31,$K[0],$K[0] ; ^=rcon[i]
|
|
.if .BIG_ENDIAN
|
|
PACK2 $Te4[0],$Te4[1],$Te4[1]
|
|
|| PACK2 $Te4[3],A0,$Te4[3]
|
|
PACKL4 $Te4[1],$Te4[3],$Te4[3]
|
|
.else
|
|
PACK2 $Te4[1],$Te4[0],$Te4[1]
|
|
|| PACK2 $Te4[3],A0,$Te4[3]
|
|
PACKL4 $Te4[3],$Te4[1],$Te4[3]
|
|
.endif
|
|
BDEC loop192?,B0
|
|
|| XOR $Te4[3],$K[0],$Te4[0] ; K[0]
|
|
XOR $Te4[0],$K[1],$K[1] ; K[1]
|
|
MV $Te4[0],$K[0]
|
|
|| XOR $K[1],$K[2],$Te4[2] ; K[2]
|
|
XOR $Te4[2],$K[3],$K[3] ; K[3]
|
|
MV $Te4[2],$K[2]
|
|
|| XOR $K[3],$K[4],$Te4[2] ; K[4]
|
|
XOR $Te4[2],$K[5],$K[5] ; K[5]
|
|
;;====================================================================
|
|
BNOP RA
|
|
STW $K[0],*$KPA++[2]
|
|
|| STW $K[1],*$KPB++[2]
|
|
STW $K[2],*$KPA++[2]
|
|
|| STW $K[3],*$KPB++[2]
|
|
MVK 12,B0 ; rounds
|
|
STW B0,*++${KPB}[7]
|
|
MVK 0,RET
|
|
;;====================================================================
|
|
;;====================================================================
|
|
key256?:
|
|
.if .BIG_ENDIAN
|
|
MV A9,$K[0]
|
|
|| MV A8,$K[1]
|
|
|| MV B9,$K[2]
|
|
|| MV B8,$K[3]
|
|
MV B17,$K[4]
|
|
|| MV B16,$K[5]
|
|
|| MV B19,$Te4[2]
|
|
|| MV B18,$K[7]
|
|
.else
|
|
MV A8,$K[0]
|
|
|| MV A9,$K[1]
|
|
|| MV B8,$K[2]
|
|
|| MV B9,$K[3]
|
|
MV B16,$K[4]
|
|
|| MV B17,$K[5]
|
|
|| MV B18,$Te4[2]
|
|
|| MV B19,$K[7]
|
|
.endif
|
|
|
|
MVK 256,A0
|
|
|| MVK 6,B0
|
|
MV $TEA,$TEB
|
|
|| ADD $TEA,A0,A30 ; rcon
|
|
;;====================================================================
|
|
loop256?:
|
|
LDW *A30++[1],A31 ; rcon[i]
|
|
|| MV $Te4[2],$K[6]
|
|
|| EXTU $K[7],EXT1,24,$Te4[0]
|
|
LDBU *${TEB}[$Te4[0]],$Te4[0]
|
|
|| MV $K[7],A0
|
|
|| EXTU $K[7],EXT2,24,$Te4[1]
|
|
LDBU *${TEB}[$Te4[1]],$Te4[1]
|
|
|| EXTU A0,EXT3,24,A0
|
|
|| EXTU $K[7],EXT0,24,$Te4[3]
|
|
.if .BIG_ENDIAN
|
|
LDBU *${TEA}[A0],$Te4[3]
|
|
|| LDBU *${TEB}[$Te4[3]],A0
|
|
.else
|
|
LDBU *${TEA}[A0],A0
|
|
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
|
|
.endif
|
|
|
|
STW $K[0],*$KPA++[2]
|
|
|| STW $K[1],*$KPB++[2]
|
|
STW $K[2],*$KPA++[2]
|
|
|| STW $K[3],*$KPB++[2]
|
|
STW $K[4],*$KPA++[2]
|
|
|| STW $K[5],*$KPB++[2]
|
|
STW $K[6],*$KPA++[2]
|
|
|| STW $K[7],*$KPB++[2]
|
|
|| XOR A31,$K[0],$K[0] ; ^=rcon[i]
|
|
.if .BIG_ENDIAN
|
|
PACK2 $Te4[0],$Te4[1],$Te4[1]
|
|
|| PACK2 $Te4[3],A0,$Te4[3]
|
|
PACKL4 $Te4[1],$Te4[3],$Te4[3]
|
|
||[!B0] B done256?
|
|
.else
|
|
PACK2 $Te4[1],$Te4[0],$Te4[1]
|
|
|| PACK2 $Te4[3],A0,$Te4[3]
|
|
PACKL4 $Te4[3],$Te4[1],$Te4[3]
|
|
||[!B0] B done256?
|
|
.endif
|
|
XOR $Te4[3],$K[0],$Te4[0] ; K[0]
|
|
XOR $Te4[0],$K[1],$K[1] ; K[1]
|
|
MV $Te4[0],$K[0]
|
|
|| XOR $K[1],$K[2],$Te4[2] ; K[2]
|
|
XOR $Te4[2],$K[3],$K[3] ; K[3]
|
|
|
|
MV $Te4[2],$K[2]
|
|
|| [B0] EXTU $K[3],EXT0,24,$Te4[0]
|
|
|| [B0] SUB B0,1,B0
|
|
LDBU *${TEB}[$Te4[0]],$Te4[0]
|
|
|| MV $K[3],A0
|
|
|| EXTU $K[3],EXT1,24,$Te4[1]
|
|
LDBU *${TEB}[$Te4[1]],$Te4[1]
|
|
|| EXTU A0,EXT2,24,A0
|
|
|| EXTU $K[3],EXT3,24,$Te4[3]
|
|
|
|
.if .BIG_ENDIAN
|
|
LDBU *${TEA}[A0],$Te4[3]
|
|
|| LDBU *${TEB}[$Te4[3]],A0
|
|
NOP 3
|
|
PACK2 $Te4[0],$Te4[1],$Te4[1]
|
|
PACK2 $Te4[3],A0,$Te4[3]
|
|
|| B loop256?
|
|
PACKL4 $Te4[1],$Te4[3],$Te4[3]
|
|
.else
|
|
LDBU *${TEA}[A0],A0
|
|
|| LDBU *${TEB}[$Te4[3]],$Te4[3]
|
|
NOP 3
|
|
PACK2 $Te4[1],$Te4[0],$Te4[1]
|
|
PACK2 $Te4[3],A0,$Te4[3]
|
|
|| B loop256?
|
|
PACKL4 $Te4[3],$Te4[1],$Te4[3]
|
|
.endif
|
|
|
|
XOR $Te4[3],$K[4],$Te4[0] ; K[4]
|
|
XOR $Te4[0],$K[5],$K[5] ; K[5]
|
|
MV $Te4[0],$K[4]
|
|
|| XOR $K[5],$K[6],$Te4[2] ; K[6]
|
|
XOR $Te4[2],$K[7],$K[7] ; K[7]
|
|
;;====================================================================
|
|
done256?:
|
|
BNOP RA
|
|
STW $K[0],*$KPA++[2]
|
|
|| STW $K[1],*$KPB++[2]
|
|
STW $K[2],*$KPA++[2]
|
|
|| STW $K[3],*$KPB++[2]
|
|
MVK 14,B0 ; rounds
|
|
STW B0,*--${KPB}[1]
|
|
MVK 0,RET
|
|
.endasmfunc
|
|
|
|
.global _AES_set_decrypt_key
|
|
_AES_set_decrypt_key:
|
|
.asmfunc
|
|
B __set_encrypt_key ; guarantee local call
|
|
MV KEY,B30 ; B30 is not modified
|
|
MV RA, B31 ; B31 is not modified
|
|
ADDKPC ret?,RA,2
|
|
ret?: ; B0 holds rounds or zero
|
|
[!B0] BNOP B31 ; return if zero
|
|
[B0] SHL B0,4,A0 ; offset to last round key
|
|
[B0] SHRU B0,1,B1
|
|
[B0] SUB B1,1,B1
|
|
[B0] MVK 0x0000001B,B3 ; AES polynomial
|
|
[B0] MVKH 0x07000000,B3
|
|
|
|
SPLOOPD 9 ; flip round keys
|
|
|| MVC B1,ILC
|
|
|| MV B30,$KPA
|
|
|| ADD B30,A0,$KPB
|
|
|| MVK 16,A0 ; sizeof(round key)
|
|
;;====================================================================
|
|
LDW *${KPA}[0],A16
|
|
|| LDW *${KPB}[0],B16
|
|
LDW *${KPA}[1],A17
|
|
|| LDW *${KPB}[1],B17
|
|
LDW *${KPA}[2],A18
|
|
|| LDW *${KPB}[2],B18
|
|
LDW *${KPA}[3],A19
|
|
|| ADD $KPA,A0,$KPA
|
|
|| LDW *${KPB}[3],B19
|
|
|| SUB $KPB,A0,$KPB
|
|
NOP
|
|
STW B16,*${KPA}[-4]
|
|
|| STW A16,*${KPB}[4]
|
|
STW B17,*${KPA}[-3]
|
|
|| STW A17,*${KPB}[5]
|
|
STW B18,*${KPA}[-2]
|
|
|| STW A18,*${KPB}[6]
|
|
STW B19,*${KPA}[-1]
|
|
|| STW A19,*${KPB}[7]
|
|
SPKERNEL
|
|
;;====================================================================
|
|
SUB B0,1,B0 ; skip last round
|
|
|| ADD B30,A0,$KPA ; skip first round
|
|
|| ADD B30,A0,$KPB
|
|
|| MVC GFPGFR,B30 ; save GFPGFR
|
|
LDW *${KPA}[0],$K[0]
|
|
|| LDW *${KPB}[1],$K[1]
|
|
|| MVC B3,GFPGFR
|
|
LDW *${KPA}[2],$K[2]
|
|
|| LDW *${KPB}[3],$K[3]
|
|
MVK 0x00000909,A24
|
|
|| MVK 0x00000B0B,B24
|
|
MVKH 0x09090000,A24
|
|
|| MVKH 0x0B0B0000,B24
|
|
MVC B0,ILC
|
|
|| SUB B0,1,B0
|
|
|
|
GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
|
|
|| GMPY4 $K[1],A24,$Kx9[1]
|
|
|| MVK 0x00000D0D,A25
|
|
|| MVK 0x00000E0E,B25
|
|
GMPY4 $K[2],A24,$Kx9[2]
|
|
|| GMPY4 $K[3],A24,$Kx9[3]
|
|
|| MVKH 0x0D0D0000,A25
|
|
|| MVKH 0x0E0E0000,B25
|
|
|
|
GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
|
|
|| GMPY4 $K[1],B24,$KxB[1]
|
|
GMPY4 $K[2],B24,$KxB[2]
|
|
|| GMPY4 $K[3],B24,$KxB[3]
|
|
|
|
SPLOOP 11 ; InvMixColumns
|
|
;;====================================================================
|
|
GMPY4 $K[0],A25,$KxD[0] ; ·0x0D
|
|
|| GMPY4 $K[1],A25,$KxD[1]
|
|
|| SWAP2 $Kx9[0],$Kx9[0] ; rotate by 16
|
|
|| SWAP2 $Kx9[1],$Kx9[1]
|
|
|| MV $K[0],$s[0] ; this or DINT
|
|
|| MV $K[1],$s[1]
|
|
|| [B0] LDW *${KPA}[4],$K[0]
|
|
|| [B0] LDW *${KPB}[5],$K[1]
|
|
GMPY4 $K[2],A25,$KxD[2]
|
|
|| GMPY4 $K[3],A25,$KxD[3]
|
|
|| SWAP2 $Kx9[2],$Kx9[2]
|
|
|| SWAP2 $Kx9[3],$Kx9[3]
|
|
|| MV $K[2],$s[2]
|
|
|| MV $K[3],$s[3]
|
|
|| [B0] LDW *${KPA}[6],$K[2]
|
|
|| [B0] LDW *${KPB}[7],$K[3]
|
|
|
|
GMPY4 $s[0],B25,$KxE[0] ; ·0x0E
|
|
|| GMPY4 $s[1],B25,$KxE[1]
|
|
|| XOR $Kx9[0],$KxB[0],$KxB[0]
|
|
|| XOR $Kx9[1],$KxB[1],$KxB[1]
|
|
GMPY4 $s[2],B25,$KxE[2]
|
|
|| GMPY4 $s[3],B25,$KxE[3]
|
|
|| XOR $Kx9[2],$KxB[2],$KxB[2]
|
|
|| XOR $Kx9[3],$KxB[3],$KxB[3]
|
|
|
|
ROTL $KxB[0],TBL3,$KxB[0]
|
|
|| ROTL $KxB[1],TBL3,$KxB[1]
|
|
|| SWAP2 $KxD[0],$KxD[0] ; rotate by 16
|
|
|| SWAP2 $KxD[1],$KxD[1]
|
|
ROTL $KxB[2],TBL3,$KxB[2]
|
|
|| ROTL $KxB[3],TBL3,$KxB[3]
|
|
|| SWAP2 $KxD[2],$KxD[2]
|
|
|| SWAP2 $KxD[3],$KxD[3]
|
|
|
|
XOR $KxE[0],$KxD[0],$KxE[0]
|
|
|| XOR $KxE[1],$KxD[1],$KxE[1]
|
|
|| [B0] GMPY4 $K[0],A24,$Kx9[0] ; ·0x09
|
|
|| [B0] GMPY4 $K[1],A24,$Kx9[1]
|
|
|| ADDAW $KPA,4,$KPA
|
|
XOR $KxE[2],$KxD[2],$KxE[2]
|
|
|| XOR $KxE[3],$KxD[3],$KxE[3]
|
|
|| [B0] GMPY4 $K[2],A24,$Kx9[2]
|
|
|| [B0] GMPY4 $K[3],A24,$Kx9[3]
|
|
|| ADDAW $KPB,4,$KPB
|
|
|
|
XOR $KxB[0],$KxE[0],$KxE[0]
|
|
|| XOR $KxB[1],$KxE[1],$KxE[1]
|
|
|| [B0] GMPY4 $K[0],B24,$KxB[0] ; ·0x0B
|
|
|| [B0] GMPY4 $K[1],B24,$KxB[1]
|
|
XOR $KxB[2],$KxE[2],$KxE[2]
|
|
|| XOR $KxB[3],$KxE[3],$KxE[3]
|
|
|| [B0] GMPY4 $K[2],B24,$KxB[2]
|
|
|| [B0] GMPY4 $K[3],B24,$KxB[3]
|
|
|| STW $KxE[0],*${KPA}[-4]
|
|
|| STW $KxE[1],*${KPB}[-3]
|
|
STW $KxE[2],*${KPA}[-2]
|
|
|| STW $KxE[3],*${KPB}[-1]
|
|
|| [B0] SUB B0,1,B0
|
|
SPKERNEL
|
|
;;====================================================================
|
|
BNOP B31,3
|
|
MVC B30,GFPGFR ; restore GFPGFR(*)
|
|
MVK 0,RET
|
|
.endasmfunc
|
|
___
|
|
# (*) Even though ABI doesn't specify GFPGFR as non-volatile, there
|
|
# are code samples out there that *assume* its default value.
|
|
}
|
|
{
|
|
my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
|
|
$code.=<<___;
|
|
.global _AES_ctr32_encrypt
|
|
_AES_ctr32_encrypt:
|
|
.asmfunc
|
|
LDNDW *${ivp}[0],A31:A30 ; load counter value
|
|
|| MV $blocks,A2 ; reassign $blocks
|
|
|| DMV RA,$key,B27:B26 ; reassign RA and $key
|
|
LDNDW *${ivp}[1],B31:B30
|
|
|| MVK 0,B2 ; don't let __encrypt load input
|
|
|| MVK 0,A1 ; and postpone writing output
|
|
.if .BIG_ENDIAN
|
|
NOP
|
|
.else
|
|
NOP 4
|
|
SWAP2 B31,B31 ; keep least significant 32 bits
|
|
SWAP4 B31,B31 ; in host byte order
|
|
.endif
|
|
ctr32_loop?:
|
|
[A2] BNOP __encrypt
|
|
|| [A1] XOR A29,A9,A9 ; input^Ek(counter)
|
|
|| [A1] XOR A28,A8,A8
|
|
|| [A2] LDNDW *INP++,A29:A28 ; load input
|
|
[!A2] BNOP B27 ; return
|
|
|| [A1] XOR B29,B9,B9
|
|
|| [A1] XOR B28,B8,B8
|
|
|| [A2] LDNDW *INP++,B29:B28
|
|
.if .BIG_ENDIAN
|
|
[A1] STNDW A9:A8,*OUT++ ; save output
|
|
|| [A2] DMV A31,A30,A9:A8 ; pass counter value to __encrypt
|
|
[A1] STNDW B9:B8,*OUT++
|
|
|| [A2] DMV B31,B30,B9:B8
|
|
|| [A2] ADD B30,1,B30 ; counter++
|
|
.else
|
|
[A1] STNDW A9:A8,*OUT++ ; save output
|
|
|| [A2] DMV A31,A30,A9:A8
|
|
|| [A2] SWAP2 B31,B0
|
|
|| [A2] ADD B31,1,B31 ; counter++
|
|
[A1] STNDW B9:B8,*OUT++
|
|
|| [A2] MV B30,B8
|
|
|| [A2] SWAP4 B0,B9
|
|
.endif
|
|
[A2] ADDKPC ctr32_loop?,RA ; return to ctr32_loop?
|
|
|| [A2] MV B26,KEY ; pass $key
|
|
|| [A2] SUB A2,1,A2 ; $blocks--
|
|
||[!A1] MVK 1,A1
|
|
NOP
|
|
NOP
|
|
.endasmfunc
|
|
___
|
|
}
|
|
# Tables are kept in endian-neutral manner
|
|
$code.=<<___;
|
|
.if __TI_EABI__
|
|
.sect ".text:aes_asm.const"
|
|
.else
|
|
.sect ".const:aes_asm"
|
|
.endif
|
|
.align 128
|
|
AES_Te:
|
|
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84
|
|
.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
|
|
.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
|
|
.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
|
|
.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
|
|
.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
|
|
.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
|
|
.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
|
|
.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
|
|
.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
|
|
.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
|
|
.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
|
|
.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
|
|
.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
|
|
.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
|
|
.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
|
|
.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
|
|
.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
|
|
.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
|
|
.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
|
|
.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
|
|
.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
|
|
.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
|
|
.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
|
|
.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
|
|
.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
|
|
.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
|
|
.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
|
|
.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
|
|
.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
|
|
.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
|
|
.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
|
|
.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
|
|
.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
|
|
.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
|
|
.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
|
|
.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
|
|
.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
|
|
.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
|
|
.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
|
|
.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
|
|
.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
|
|
.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
|
|
.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
|
|
.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
|
|
.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
|
|
.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
|
|
.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
|
|
.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
|
|
.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
|
|
.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
|
|
.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
|
|
.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
|
|
.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
|
|
.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
|
|
.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
|
|
.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
|
|
.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
|
|
.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
|
|
.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
|
|
.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
|
|
.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
|
|
.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
|
|
.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
|
|
.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
|
|
.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
|
|
.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
|
|
.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
|
|
.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
|
|
.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
|
|
.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
|
|
.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
|
|
.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
|
|
.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
|
|
.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
|
|
.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
|
|
.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
|
|
.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
|
|
.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
|
|
.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
|
|
.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
|
|
.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
|
|
.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
|
|
.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
|
|
.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
|
|
.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
|
|
.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
|
|
.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
|
|
.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
|
|
.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
|
|
.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
|
|
.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
|
|
.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
|
|
.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
|
|
.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
|
|
.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
|
|
.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
|
|
.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
|
|
.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
|
|
.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
|
|
.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
|
|
.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
|
|
.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
|
|
.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
|
|
.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
|
|
.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
|
|
.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
|
|
.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
|
|
.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
|
|
.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
|
|
.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
|
|
.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
|
|
.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
|
|
.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
|
|
.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
|
|
.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
|
|
.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
|
|
.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
|
|
.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
|
|
.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
|
|
.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
|
|
.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
|
|
.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
|
|
.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
|
|
.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
|
|
.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
|
|
.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
|
|
.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
|
|
AES_Te4:
|
|
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
|
|
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
|
|
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
|
|
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
|
|
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
|
|
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
|
|
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
|
|
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
|
|
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
|
|
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
|
|
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
|
|
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
|
|
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
|
|
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
|
|
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
|
|
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
|
|
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
|
|
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
|
|
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
|
|
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
|
|
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
|
|
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
|
|
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
|
|
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
|
|
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
|
|
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
|
|
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
|
|
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
|
|
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
|
|
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
|
|
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
|
|
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
|
|
rcon:
|
|
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00
|
|
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
|
|
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
|
|
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
|
|
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
|
|
.align 128
|
|
AES_Td:
|
|
.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53
|
|
.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
|
|
.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
|
|
.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
|
|
.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
|
|
.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
|
|
.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
|
|
.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
|
|
.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
|
|
.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
|
|
.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
|
|
.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
|
|
.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
|
|
.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
|
|
.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
|
|
.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
|
|
.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
|
|
.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
|
|
.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
|
|
.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
|
|
.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
|
|
.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
|
|
.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
|
|
.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
|
|
.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
|
|
.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
|
|
.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
|
|
.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
|
|
.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
|
|
.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
|
|
.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
|
|
.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
|
|
.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
|
|
.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
|
|
.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
|
|
.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
|
|
.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
|
|
.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
|
|
.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
|
|
.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
|
|
.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
|
|
.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
|
|
.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
|
|
.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
|
|
.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
|
|
.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
|
|
.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
|
|
.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
|
|
.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
|
|
.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
|
|
.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
|
|
.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
|
|
.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
|
|
.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
|
|
.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
|
|
.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
|
|
.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
|
|
.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
|
|
.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
|
|
.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
|
|
.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
|
|
.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
|
|
.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
|
|
.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
|
|
.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
|
|
.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
|
|
.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
|
|
.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
|
|
.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
|
|
.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
|
|
.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
|
|
.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
|
|
.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
|
|
.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
|
|
.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
|
|
.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
|
|
.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
|
|
.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
|
|
.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
|
|
.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
|
|
.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
|
|
.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
|
|
.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
|
|
.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
|
|
.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
|
|
.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
|
|
.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
|
|
.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
|
|
.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
|
|
.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
|
|
.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
|
|
.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
|
|
.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
|
|
.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
|
|
.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
|
|
.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
|
|
.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
|
|
.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
|
|
.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
|
|
.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
|
|
.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
|
|
.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
|
|
.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
|
|
.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
|
|
.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
|
|
.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
|
|
.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
|
|
.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
|
|
.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
|
|
.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
|
|
.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
|
|
.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
|
|
.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
|
|
.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
|
|
.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
|
|
.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
|
|
.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
|
|
.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
|
|
.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
|
|
.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
|
|
.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
|
|
.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
|
|
.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
|
|
.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
|
|
.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
|
|
.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
|
|
.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
|
|
.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
|
|
AES_Td4:
|
|
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
|
|
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
|
|
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
|
|
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
|
|
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
|
|
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
|
|
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
|
|
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
|
|
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
|
|
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
|
|
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
|
|
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
|
|
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
|
|
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
|
|
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
|
|
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
|
|
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
|
|
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
|
|
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
|
|
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
|
|
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
|
|
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
|
|
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
|
|
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
|
|
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
|
|
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
|
|
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
|
|
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
|
|
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
|
|
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
|
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
|
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
|
.cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
|
|
.align 4
|
|
___
|
|
|
|
print $code;
|
|
close STDOUT;
|