ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.

RT#4625

Reviewed-by: Rich Salz <rsalz@openssl.org>
This commit is contained in:
Andy Polyakov 2016-08-19 23:18:35 +02:00 committed by Matt Caswell
parent e3057a57ca
commit c74aea8d6c
4 changed files with 102 additions and 212 deletions

View file

@ -1405,8 +1405,12 @@ ecp_nistz256_point_add:
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
sub sp,sp,#32*18+16
ldmia $b_ptr!,{r4-r11} @ copy in2
ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
stmia r3!,{r4-r11}
ldmia $b_ptr!,{r4-r11} @ copy in2_y
stmia r3!,{r4-r11}
ldmia $b_ptr,{r4-r11} @ copy in2_z
orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
@ -1414,18 +1418,6 @@ ecp_nistz256_point_add:
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
ldmia $b_ptr!,{r4-r11}
orr r12,r12,r4
orr r12,r12,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
ldmia $b_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
@ -1434,8 +1426,12 @@ ecp_nistz256_point_add:
stmia r3,{r4-r11}
str r12,[sp,#32*18+8] @ !in2infty
ldmia $a_ptr!,{r4-r11} @ copy in1
ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
stmia r3!,{r4-r11}
ldmia $a_ptr!,{r4-r11} @ copy in1_y
stmia r3!,{r4-r11}
ldmia $a_ptr,{r4-r11} @ copy in1_z
orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
@ -1443,18 +1439,6 @@ ecp_nistz256_point_add:
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
ldmia $a_ptr!,{r4-r11}
orr r12,r12,r4
orr r12,r12,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
ldmia $a_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
@ -1684,8 +1668,12 @@ ecp_nistz256_point_add_affine:
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
sub sp,sp,#32*15
ldmia $a_ptr!,{r4-r11} @ copy in1
ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
stmia r3!,{r4-r11}
ldmia $a_ptr!,{r4-r11} @ copy in1_y
stmia r3!,{r4-r11}
ldmia $a_ptr,{r4-r11} @ copy in1_z
orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
@ -1693,18 +1681,6 @@ ecp_nistz256_point_add_affine:
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
ldmia $a_ptr!,{r4-r11}
orr r12,r12,r4
orr r12,r12,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
ldmia $a_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine:
stmia r3,{r4-r11}
str r12,[sp,#32*15+4] @ !in1infty
ldmia $b_ptr!,{r4-r11} @ copy in2
ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
orr r12,r4,r5
orr r12,r12,r6
@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine:
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
ldmia $b_ptr!,{r4-r11}
ldmia $b_ptr!,{r4-r11} @ copy in2_y
orr r12,r12,r4
orr r12,r12,r5
orr r12,r12,r6

View file

@ -862,46 +862,28 @@ ecp_nistz256_point_add:
stp x25,x26,[sp,#64]
sub sp,sp,#32*12
ldp $a0,$a1,[$bp]
ldp $a2,$a3,[$bp,#16]
ldp $t0,$t1,[$bp,#32]
ldp $t2,$t3,[$bp,#48]
ldp $a0,$a1,[$bp,#64] // in2_z
ldp $a2,$a3,[$bp,#64+16]
mov $rp_real,$rp
mov $ap_real,$ap
mov $bp_real,$bp
orr $a0,$a0,$a1
orr $a2,$a2,$a3
ldp $acc0,$acc1,[$ap]
orr $t0,$t0,$t1
orr $t2,$t2,$t3
ldp $acc2,$acc3,[$ap,#16]
orr $a0,$a0,$a2
orr $t2,$t0,$t2
ldp $t0,$t1,[$ap,#32]
orr $in2infty,$a0,$t2
cmp $in2infty,#0
ldp $t2,$t3,[$ap,#48]
csetm $in2infty,ne // !in2infty
ldp $a0,$a1,[$bp_real,#64] // forward load for p256_sqr_mont
orr $acc0,$acc0,$acc1
orr $acc2,$acc2,$acc3
ldp $a2,$a3,[$bp_real,#64+16]
orr $t0,$t0,$t1
orr $t2,$t2,$t3
orr $acc0,$acc0,$acc2
orr $t0,$t0,$t2
orr $in1infty,$acc0,$t0
cmp $in1infty,#0
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
csetm $in1infty,ne // !in1infty
orr $t0,$a0,$a1
orr $t2,$a2,$a3
orr $in2infty,$t0,$t2
cmp $in2infty,#0
csetm $in2infty,ne // !in2infty
add $rp,sp,#$Z2sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
ldp $a0,$a1,[$ap_real,#64]
ldp $a0,$a1,[$ap_real,#64] // in1_z
ldp $a2,$a3,[$ap_real,#64+16]
orr $t0,$a0,$a1
orr $t2,$a2,$a3
orr $in1infty,$t0,$t2
cmp $in1infty,#0
csetm $in1infty,ne // !in1infty
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine:
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
ldp $a0,$a1,[$ap]
ldp $a2,$a3,[$ap,#16]
ldp $t0,$t1,[$ap,#32]
ldp $t2,$t3,[$ap,#48]
orr $a0,$a0,$a1
orr $a2,$a2,$a3
orr $t0,$t0,$t1
orr $t2,$t2,$t3
orr $a0,$a0,$a2
orr $t0,$t0,$t2
orr $in1infty,$a0,$t0
ldp $a0,$a1,[$ap,#64] // in1_z
ldp $a2,$a3,[$ap,#64+16]
orr $t0,$a0,$a1
orr $t2,$a2,$a3
orr $in1infty,$t0,$t2
cmp $in1infty,#0
csetm $in1infty,ne // !in1infty
ldp $a0,$a1,[$bp]
ldp $a2,$a3,[$bp,#16]
ldp $t0,$t1,[$bp,#32]
ldp $acc0,$acc1,[$bp] // in2_x
ldp $acc2,$acc3,[$bp,#16]
ldp $t0,$t1,[$bp,#32] // in2_y
ldp $t2,$t3,[$bp,#48]
orr $a0,$a0,$a1
orr $a2,$a2,$a3
orr $acc0,$acc0,$acc1
orr $acc2,$acc2,$acc3
orr $t0,$t0,$t1
orr $t2,$t2,$t3
orr $a0,$a0,$a2
orr $acc0,$acc0,$acc2
orr $t0,$t0,$t2
orr $in2infty,$a0,$t0
orr $in2infty,$acc0,$t0
cmp $in2infty,#0
csetm $in2infty,ne // !in2infty
ldp $a0,$a1,[$ap_real,#64]
ldp $a2,$a3,[$ap_real,#64+16]
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);

View file

@ -899,71 +899,39 @@ ecp_nistz256_point_add:
mov $ap,$ap_real
mov $bp,$bp_real
ld [$bp],@acc[0] ! in2_x
ld [$bp+4],@acc[1]
ld [$bp+8],@acc[2]
ld [$bp+12],@acc[3]
ld [$bp+16],@acc[4]
ld [$bp+20],@acc[5]
ld [$bp+24],@acc[6]
ld [$bp+28],@acc[7]
ld [$bp+32],$t0 ! in2_y
ld [$bp+32+4],$t1
ld [$bp+32+8],$t2
ld [$bp+32+12],$t3
ld [$bp+32+16],$t4
ld [$bp+32+20],$t5
ld [$bp+32+24],$t6
ld [$bp+32+28],$t7
or @acc[1],@acc[0],@acc[0]
or @acc[3],@acc[2],@acc[2]
or @acc[5],@acc[4],@acc[4]
or @acc[7],@acc[6],@acc[6]
or @acc[2],@acc[0],@acc[0]
or @acc[6],@acc[4],@acc[4]
or @acc[4],@acc[0],@acc[0]
ld [$bp+64],$t0 ! in2_z
ld [$bp+64+4],$t1
ld [$bp+64+8],$t2
ld [$bp+64+12],$t3
ld [$bp+64+16],$t4
ld [$bp+64+20],$t5
ld [$bp+64+24],$t6
ld [$bp+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
or $t4,$t0,$t0
or @acc[0],$t0,$t0 ! !in2infty
or $t4,$t0,$t0 ! !in2infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-12]
ld [$ap],@acc[0] ! in1_x
ld [$ap+4],@acc[1]
ld [$ap+8],@acc[2]
ld [$ap+12],@acc[3]
ld [$ap+16],@acc[4]
ld [$ap+20],@acc[5]
ld [$ap+24],@acc[6]
ld [$ap+28],@acc[7]
ld [$ap+32],$t0 ! in1_y
ld [$ap+32+4],$t1
ld [$ap+32+8],$t2
ld [$ap+32+12],$t3
ld [$ap+32+16],$t4
ld [$ap+32+20],$t5
ld [$ap+32+24],$t6
ld [$ap+32+28],$t7
or @acc[1],@acc[0],@acc[0]
or @acc[3],@acc[2],@acc[2]
or @acc[5],@acc[4],@acc[4]
or @acc[7],@acc[6],@acc[6]
or @acc[2],@acc[0],@acc[0]
or @acc[6],@acc[4],@acc[4]
or @acc[4],@acc[0],@acc[0]
ld [$ap+64],$t0 ! in1_z
ld [$ap+64+4],$t1
ld [$ap+64+8],$t2
ld [$ap+64+12],$t3
ld [$ap+64+16],$t4
ld [$ap+64+20],$t5
ld [$ap+64+24],$t6
ld [$ap+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
or $t4,$t0,$t0
or @acc[0],$t0,$t0 ! !in1infty
or $t4,$t0,$t0 ! !in1infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-16]
@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine:
mov $ap,$ap_real
mov $bp,$bp_real
ld [$ap],@acc[0] ! in1_x
ld [$ap+4],@acc[1]
ld [$ap+8],@acc[2]
ld [$ap+12],@acc[3]
ld [$ap+16],@acc[4]
ld [$ap+20],@acc[5]
ld [$ap+24],@acc[6]
ld [$ap+28],@acc[7]
ld [$ap+32],$t0 ! in1_y
ld [$ap+32+4],$t1
ld [$ap+32+8],$t2
ld [$ap+32+12],$t3
ld [$ap+32+16],$t4
ld [$ap+32+20],$t5
ld [$ap+32+24],$t6
ld [$ap+32+28],$t7
or @acc[1],@acc[0],@acc[0]
or @acc[3],@acc[2],@acc[2]
or @acc[5],@acc[4],@acc[4]
or @acc[7],@acc[6],@acc[6]
or @acc[2],@acc[0],@acc[0]
or @acc[6],@acc[4],@acc[4]
or @acc[4],@acc[0],@acc[0]
ld [$ap+64],$t0 ! in1_z
ld [$ap+64+4],$t1
ld [$ap+64+8],$t2
ld [$ap+64+12],$t3
ld [$ap+64+16],$t4
ld [$ap+64+20],$t5
ld [$ap+64+24],$t6
ld [$ap+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
or $t4,$t0,$t0
or @acc[0],$t0,$t0 ! !in1infty
or $t4,$t0,$t0 ! !in1infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-16]
@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in2_y+16]
stx $acc3,[%sp+LOCALS64+$in2_y+24]
or $a1,$a0,$a0
or $a3,$a2,$a2
or $acc1,$acc0,$acc0
or $acc3,$acc2,$acc2
or $a2,$a0,$a0
or $acc2,$acc0,$acc0
or $acc0,$a0,$a0
movrnz $a0,-1,$a0 ! !in2infty
stx $a0,[%fp+STACK_BIAS-8]
ld [$bp+64],$acc0 ! in2_z
ld [$bp+64+4],$t0
ld [$bp+64+8],$acc1
@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in2_z+16]
stx $acc3,[%sp+LOCALS64+$in2_z+24]
or $acc1,$acc0,$acc0
or $acc3,$acc2,$acc2
or $acc2,$acc0,$acc0
movrnz $acc0,-1,$acc0 ! !in2infty
stx $acc0,[%fp+STACK_BIAS-8]
or $a0,$t0,$a0
ld [$ap+32],$acc0 ! in1_y
or $a1,$t1,$a1
@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in1_y+16]
stx $acc3,[%sp+LOCALS64+$in1_y+24]
or $a1,$a0,$a0
or $a3,$a2,$a2
or $acc1,$acc0,$acc0
or $acc3,$acc2,$acc2
or $a2,$a0,$a0
or $acc2,$acc0,$acc0
or $acc0,$a0,$a0
movrnz $a0,-1,$a0 ! !in1infty
stx $a0,[%fp+STACK_BIAS-16]
ldx [%sp+LOCALS64+$in2_z],$a0 ! forward load
ldx [%sp+LOCALS64+$in2_z+8],$a1
ldx [%sp+LOCALS64+$in2_z+16],$a2
@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in1_z+16]
stx $acc3,[%sp+LOCALS64+$in1_z+24]
or $acc1,$acc0,$acc0
or $acc3,$acc2,$acc2
or $acc2,$acc0,$acc0
movrnz $acc0,-1,$acc0 ! !in1infty
stx $acc0,[%fp+STACK_BIAS-16]
call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z2sqr, in2_z);
add %sp,LOCALS64+$Z2sqr,$rp
@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3:
stx $acc2,[%sp+LOCALS64+$in1_y+16]
stx $acc3,[%sp+LOCALS64+$in1_y+24]
or $a1,$a0,$a0
or $a3,$a2,$a2
or $acc1,$acc0,$acc0
or $acc3,$acc2,$acc2
or $a2,$a0,$a0
or $acc2,$acc0,$acc0
or $acc0,$a0,$a0
movrnz $a0,-1,$a0 ! !in1infty
stx $a0,[%fp+STACK_BIAS-16]
ld [$ap+64],$a0 ! in1_z
ld [$ap+64+4],$t0
ld [$ap+64+8],$a1
@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3:
stx $a2,[%sp+LOCALS64+$in1_z+16]
stx $a3,[%sp+LOCALS64+$in1_z+24]
or $a1,$a0,$t0
or $a3,$a2,$t2
or $t2,$t0,$t0
movrnz $t0,-1,$t0 ! !in1infty
stx $t0,[%fp+STACK_BIAS-16]
call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z1sqr, in1_z);
add %sp,LOCALS64+$Z1sqr,$rp

View file

@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) {
&mov ("edx",&DWP($i+12,"esi"));
&mov (&DWP($i+0,"edi"),"eax");
&mov (&DWP(32*18+12,"esp"),"ebp") if ($i==0);
&mov ("ebp","eax") if ($i==0);
&or ("ebp","eax") if ($i!=0 && $i<64);
&mov ("ebp","eax") if ($i==64);
&or ("ebp","eax") if ($i>64);
&mov (&DWP($i+4,"edi"),"ebx");
&or ("ebp","ebx") if ($i<64);
&or ("ebp","ebx") if ($i>=64);
&mov (&DWP($i+8,"edi"),"ecx");
&or ("ebp","ecx") if ($i<64);
&or ("ebp","ecx") if ($i>=64);
&mov (&DWP($i+12,"edi"),"edx");
&or ("ebp","edx") if ($i<64);
&or ("ebp","edx") if ($i>=64);
}
&xor ("eax","eax");
&mov ("esi",&wparam(1));
@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) {
&mov ("ecx",&DWP($i+8,"esi"));
&mov ("edx",&DWP($i+12,"esi"));
&mov (&DWP($i+0,"edi"),"eax");
&mov ("ebp","eax") if ($i==0);
&or ("ebp","eax") if ($i!=0 && $i<64);
&mov ("ebp","eax") if ($i==64);
&or ("ebp","eax") if ($i>64);
&mov (&DWP($i+4,"edi"),"ebx");
&or ("ebp","ebx") if ($i<64);
&or ("ebp","ebx") if ($i>=64);
&mov (&DWP($i+8,"edi"),"ecx");
&or ("ebp","ecx") if ($i<64);
&or ("ebp","ecx") if ($i>=64);
&mov (&DWP($i+12,"edi"),"edx");
&or ("ebp","edx") if ($i<64);
&or ("ebp","edx") if ($i>=64);
}
&xor ("eax","eax");
&sub ("eax","ebp");
@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) {
&mov ("edx",&DWP($i+12,"esi"));
&mov (&DWP($i+0,"edi"),"eax");
&mov (&DWP(32*15+8,"esp"),"ebp") if ($i==0);
&mov ("ebp","eax") if ($i==0);
&or ("ebp","eax") if ($i!=0 && $i<64);
&mov ("ebp","eax") if ($i==64);
&or ("ebp","eax") if ($i>64);
&mov (&DWP($i+4,"edi"),"ebx");
&or ("ebp","ebx") if ($i<64);
&or ("ebp","ebx") if ($i>=64);
&mov (&DWP($i+8,"edi"),"ecx");
&or ("ebp","ecx") if ($i<64);
&or ("ebp","ecx") if ($i>=64);
&mov (&DWP($i+12,"edi"),"edx");
&or ("ebp","edx") if ($i<64);
&or ("ebp","edx") if ($i>=64);
}
&xor ("eax","eax");
&mov ("esi",&wparam(2));