From e7169a5835390d20057de8a19785982bd6a9b8c8 Mon Sep 17 00:00:00 2001 From: Emilia Kasper Date: Thu, 28 Aug 2014 15:33:34 +0200 Subject: [PATCH] Constant-time utilities Pull constant-time methods out to a separate header, add tests. Reviewed-by: Bodo Moeller (cherry picked from commit 9a9b0c0401cae443f115ff19921d347b20aa396b) Conflicts: test/Makefile Signed-off-by: Mark Hatle From e517dfd3d1a1e4dcb37689274aead3daaedc98af Mon Sep 17 00:00:00 2001 From: Emilia Kasper Date: Thu, 28 Aug 2014 19:45:55 +0200 Subject: [PATCH] Make the inline const-time functions static. "inline" without static is not correct as the compiler may choose to ignore it and will then either emit an external definition, or expect one. Reviewed-by: Geoff Thorpe (cherry picked from commit 86f50b36e63275a916b147f9d8764e3c0c060fdb) Signed-off-by: Mark Hatle From 0f04b004acb27a705578d5b2cede0a84ba9af0dd Mon Sep 17 00:00:00 2001 From: Emilia Kasper Date: Thu, 28 Aug 2014 19:43:49 +0200 Subject: [PATCH] RT3066: rewrite RSA padding checks to be slightly more constant time. Also tweak s3_cbc.c to use new constant-time methods. Also fix memory leaks from internal errors in RSA_padding_check_PKCS1_OAEP_mgf1 This patch is based on the original RT submission by Adam Langley Conflicts: crypto/rsa/rsa_oaep.c Signed-off-by: Mark Hatle From 8d507aee7c2d91aaa9b9142530d46d82f1a0139b Mon Sep 17 00:00:00 2001 From: Emilia Kasper Date: Thu, 4 Sep 2014 13:04:42 +0200 Subject: [PATCH] RT3067: simplify patch (Original commit adb46dbc6dd7347750df2468c93e8c34bcb93a4b) Use the new constant-time methods consistently in s3_srvr.c Reviewed-by: Kurt Roeckx (cherry picked from commit 455b65dfab0de51c9f67b3c909311770f2b3f801) Signed-off-by: Mark Hatle From b3c1c872647cd3e15ea4eb951999eae3e69784ad Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 26 Jan 2016 11:34:41 +0100 Subject: [PATCH] bn/bn_exp.c: constant-time MOD_EXP_CTIME_COPY_FROM_PREBUF. --- crypto/bn/bn_exp.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 18 deletions(-) Index: openssl-1.0.1e/crypto/bn/bn_exp.c =================================================================== --- openssl-1.0.1e.orig/crypto/bn/bn_exp.c +++ openssl-1.0.1e/crypto/bn/bn_exp.c @@ -111,6 +111,7 @@ #include "cryptlib.h" +#include "constant_time_locl.h" #include "bn_lcl.h" #include @@ -534,36 +535,73 @@ err: * as cache lines are concerned. The following functions are used to transfer a BIGNUM * from/to that table. */ -static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width) +static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int window) { - size_t i, j; + int i, j; + int width = 1 << window; + BN_ULONG *table = (BN_ULONG *)buf; if (top > b->top) top = b->top; /* this works because 'buf' is explicitly zeroed */ - for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) - { - buf[j] = ((unsigned char*)b->d)[i]; + for (i = 0, j = idx; i < top; i++, j += width) { + table[j] = b->d[i]; } return 1; } -static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) +static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int window) { - size_t i, j; + int i, j; + int width = 1 << window; + volatile BN_ULONG *table = (volatile BN_ULONG *)buf; if (bn_wexpand(b, top) == NULL) return 0; - for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) - { - ((unsigned char*)b->d)[i] = buf[j]; + if (window <= 3) { + for (i = 0; i < top; i++, table += width) { + BN_ULONG acc = 0; + + for (j = 0; j < width; j++) { + acc |= table[j] & + ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); + } + + b->d[i] = acc; + } + } else { + int xstride = 1 << (window - 2); + BN_ULONG y0, y1, y2, y3; + + + i = idx >> (window - 2); /* equivalent of idx / xstride */ + idx &= xstride - 1; /* equivalent of idx % xstride */ + + y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); + y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); + y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); + y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); + + for (i = 0; i < top; i++, table += width) { + BN_ULONG acc = 0; + + for (j = 0; j < xstride; j++) { + acc |= ( (table[j + 0 * xstride] & y0) | + (table[j + 1 * xstride] & y1) | + (table[j + 2 * xstride] & y2) | + (table[j + 3 * xstride] & y3) ) + & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); + } + + b->d[i] = acc; + } } b->top = top; bn_correct_top(b); return 1; - } + } /* Given a pointer value, compute the next address that is a cache line multiple. */ #define MOD_EXP_CTIME_ALIGN(x_) \ @@ -767,8 +805,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr else #endif { - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err; + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) goto err; + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) goto err; /* If the window size is greater than 1, then calculate * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) @@ -778,20 +816,20 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr if (window > 1) { if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx)) goto err; - if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err; + if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, window)) goto err; for (i=3; i=0; i--,bits--) wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); - if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err; + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,window)) goto err; /* Scan the exponent one window at a time starting from the most * significant bits. @@ -808,7 +846,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr } /* Fetch the appropriate pre-computed value from the pre-buf */ - if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err; + if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, window)) goto err; /* Multiply the result into the intermediate result */ if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err; Index: openssl-1.0.1e/crypto/perlasm/x86_64-xlate.pl =================================================================== --- openssl-1.0.1e.orig/crypto/perlasm/x86_64-xlate.pl +++ openssl-1.0.1e/crypto/perlasm/x86_64-xlate.pl @@ -121,7 +121,7 @@ my %globals; $self->{sz} = ""; } elsif ($self->{op} =~ /^v/) { # VEX $self->{sz} = ""; - } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { + } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) { $self->{sz} = ""; } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { $self->{op} = $1; Index: openssl-1.0.1e/crypto/bn/asm/x86_64-mont5.pl =================================================================== --- openssl-1.0.1e.orig/crypto/bn/asm/x86_64-mont5.pl +++ openssl-1.0.1e/crypto/bn/asm/x86_64-mont5.pl @@ -66,60 +66,113 @@ bn_mul_mont_gather5: .align 16 .Lmul_enter: mov ${num}d,${num}d - mov `($win64?56:8)`(%rsp),%r10d # load 7th argument + movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument + lea .Linc(%rip),%r10 push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) + .Lmul_alloca: -___ -$code.=<<___; mov %rsp,%rax lea 2($num),%r11 neg %r11 - lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)) + lea -264(%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)+256+8) and \$-1024,%rsp # minimize TLB usage mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul_body: - mov $bp,%r12 # reassign $bp + lea 128($bp),%r12 # reassign $bp (+size optimization) ___ $bp="%r12"; $STRIDE=2**5*8; # 5 is "window size" $N=$STRIDE/4; # should match cache line size $code.=<<___; - mov %r10,%r11 - shr \$`log($N/8)/log(2)`,%r10 - and \$`$N/8-1`,%r11 - not %r10 - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96($bp,%r11,8),$bp # pointer within 1st cache line - movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which - movq 8(%rax,%r10,8),%xmm5 # cache line contains element - movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument - movq 24(%rax,%r10,8),%xmm7 - - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002 + lea 24-112(%rsp,$num,8),%r10# place the mask after tp[num+3] (+ICache optimization) + and \$-16,%r10 + + pshufd \$0,%xmm5,%xmm5 # broadcast index + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to index and save result to stack +# +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 + .byte 0x67 + movdqa %xmm4,%xmm3 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($k+0)+112`(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($k+1)+112`(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($k+2)+112`(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,`16*($k+3)+112`(%r10) + movdqa %xmm4,%xmm3 +___ +} +$code.=<<___; # last iteration can be optimized + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,`16*($k+0)+112`(%r10) + + paddd %xmm2,%xmm3 + .byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,`16*($k+1)+112`(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,`16*($k+2)+112`(%r10) + pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register + + pand `16*($k+1)-128`($bp),%xmm1 + pand `16*($k+2)-128`($bp),%xmm2 + movdqa %xmm3,`16*($k+3)+112`(%r10) + pand `16*($k+3)-128`($bp),%xmm3 por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm4 + movdqa `16*($k+1)-128`($bp),%xmm5 + movdqa `16*($k+2)-128`($bp),%xmm2 + pand `16*($k+0)+112`(%r10),%xmm4 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+1)+112`(%r10),%xmm5 + por %xmm4,%xmm0 + pand `16*($k+2)+112`(%r10),%xmm2 + por %xmm5,%xmm1 + pand `16*($k+3)+112`(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +} +$code.=<<___; + por %xmm1,%xmm0 + pshufd \$0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - movq %xmm0,$m0 # m0=bp[0] mov ($n0),$n0 # pull n0[0] value @@ -128,29 +181,14 @@ $code.=<<___; xor $i,$i # i=0 xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mov $n0,$m1 mulq $m0 # ap[0]*bp[0] mov %rax,$lo0 mov ($np),%rax - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $lo0,$m1 # "tp[0]"*n0 mov %rdx,$hi0 - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$lo0 # discarded mov 8($ap),%rax @@ -183,8 +221,6 @@ $code.=<<___; cmp $num,$j jne .L1st - movq %xmm0,$m0 # bp[1] - add %rax,$hi1 mov ($ap),%rax # ap[0] adc \$0,%rdx @@ -204,33 +240,46 @@ $code.=<<___; jmp .Louter .align 16 .Louter: + lea 24+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization) + and \$-16,%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($k=0;$k<$STRIDE/16;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm0 + movdqa `16*($k+1)-128`($bp),%xmm1 + movdqa `16*($k+2)-128`($bp),%xmm2 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+0)-128`(%rdx),%xmm0 + pand `16*($k+1)-128`(%rdx),%xmm1 + por %xmm0,%xmm4 + pand `16*($k+2)-128`(%rdx),%xmm2 + por %xmm1,%xmm5 + pand `16*($k+3)-128`(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + lea $STRIDE($bp),$bp + movq %xmm0,$m0 # m0=bp[i] + xor $j,$j # j=0 mov $n0,$m1 mov (%rsp),$lo0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mulq $m0 # ap[0]*bp[i] add %rax,$lo0 # ap[0]*bp[i]+tp[0] mov ($np),%rax adc \$0,%rdx - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $lo0,$m1 # tp[0]*n0 mov %rdx,$hi0 - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$lo0 # discarded mov 8($ap),%rax @@ -266,8 +315,6 @@ $code.=<<___; cmp $num,$j jne .Linner - movq %xmm0,$m0 # bp[i+1] - add %rax,$hi1 mov ($ap),%rax # ap[0] adc \$0,%rdx @@ -321,13 +368,7 @@ $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps (%rsi),%xmm6 - movaps 0x10(%rsi),%xmm7 - lea 0x28(%rsi),%rsi -___ -$code.=<<___; + mov (%rsi),%r15 mov 8(%rsi),%r14 mov 16(%rsi),%r13 @@ -348,91 +389,130 @@ $code.=<<___; bn_mul4x_mont_gather5: .Lmul4x_enter: mov ${num}d,${num}d - mov `($win64?56:8)`(%rsp),%r10d # load 7th argument + movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument + lea .Linc(%rip),%r10 push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 -___ -$code.=<<___ if ($win64); - lea -0x28(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) + .Lmul4x_alloca: -___ -$code.=<<___; mov %rsp,%rax lea 4($num),%r11 neg %r11 - lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)) + lea -256(%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)+256) and \$-1024,%rsp # minimize TLB usage mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul4x_body: mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp - mov %rdx,%r12 # reassign $bp + lea 128(%rdx),%r12 # reassign $bp (+size optimization) ___ $bp="%r12"; $STRIDE=2**5*8; # 5 is "window size" $N=$STRIDE/4; # should match cache line size $code.=<<___; - mov %r10,%r11 - shr \$`log($N/8)/log(2)`,%r10 - and \$`$N/8-1`,%r11 - not %r10 - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96($bp,%r11,8),$bp # pointer within 1st cache line - movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which - movq 8(%rax,%r10,8),%xmm5 # cache line contains element - movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument - movq 24(%rax,%r10,8),%xmm7 - - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002 + lea 32-112(%rsp,$num,8),%r10# place the mask after tp[num+4] (+ICache optimization) + + pshufd \$0,%xmm5,%xmm5 # broadcast index + movdqa %xmm1,%xmm4 + .byte 0x67,0x67 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to index and save result to stack +# +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 + .byte 0x67 + movdqa %xmm4,%xmm3 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($k+0)+112`(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($k+1)+112`(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($k+2)+112`(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,`16*($k+3)+112`(%r10) + movdqa %xmm4,%xmm3 +___ +} +$code.=<<___; # last iteration can be optimized + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,`16*($k+0)+112`(%r10) + + paddd %xmm2,%xmm3 + .byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,`16*($k+1)+112`(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,`16*($k+2)+112`(%r10) + pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register + + pand `16*($k+1)-128`($bp),%xmm1 + pand `16*($k+2)-128`($bp),%xmm2 + movdqa %xmm3,`16*($k+3)+112`(%r10) + pand `16*($k+3)-128`($bp),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +for($k=0;$k<$STRIDE/16-4;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm4 + movdqa `16*($k+1)-128`($bp),%xmm5 + movdqa `16*($k+2)-128`($bp),%xmm2 + pand `16*($k+0)+112`(%r10),%xmm4 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+1)+112`(%r10),%xmm5 + por %xmm4,%xmm0 + pand `16*($k+2)+112`(%r10),%xmm2 + por %xmm5,%xmm1 + pand `16*($k+3)+112`(%r10),%xmm3 por %xmm2,%xmm0 + por %xmm3,%xmm1 +___ +} +$code.=<<___; + por %xmm1,%xmm0 + pshufd \$0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - movq %xmm0,$m0 # m0=bp[0] + mov ($n0),$n0 # pull n0[0] value mov ($ap),%rax xor $i,$i # i=0 xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 - mov $n0,$m1 mulq $m0 # ap[0]*bp[0] mov %rax,$A[0] mov ($np),%rax - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $A[0],$m1 # "tp[0]"*n0 mov %rdx,$A[1] - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$A[0] # discarded mov 8($ap),%rax @@ -550,8 +630,6 @@ $code.=<<___; mov $N[1],-16(%rsp,$j,8) # tp[j-1] mov %rdx,$N[0] - movq %xmm0,$m0 # bp[1] - xor $N[1],$N[1] add $A[0],$N[0] adc \$0,$N[1] @@ -561,12 +639,34 @@ $code.=<<___; lea 1($i),$i # i++ .align 4 .Louter4x: + lea 32+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization) + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($k=0;$k<$STRIDE/16;$k+=4) { +$code.=<<___; + movdqa `16*($k+0)-128`($bp),%xmm0 + movdqa `16*($k+1)-128`($bp),%xmm1 + movdqa `16*($k+2)-128`($bp),%xmm2 + movdqa `16*($k+3)-128`($bp),%xmm3 + pand `16*($k+0)-128`(%rdx),%xmm0 + pand `16*($k+1)-128`(%rdx),%xmm1 + por %xmm0,%xmm4 + pand `16*($k+2)-128`(%rdx),%xmm2 + por %xmm1,%xmm5 + pand `16*($k+3)-128`(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + lea $STRIDE($bp),$bp + movq %xmm0,$m0 # m0=bp[i] + xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 mov (%rsp),$A[0] mov $n0,$m1 @@ -575,18 +675,9 @@ $code.=<<___; mov ($np),%rax adc \$0,%rdx - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - imulq $A[0],$m1 # tp[0]*n0 mov %rdx,$A[1] - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 - mulq $m1 # np[0]*m1 add %rax,$A[0] # "$N[0]", discarded mov 8($ap),%rax @@ -718,7 +809,6 @@ $code.=<<___; mov $N[0],-24(%rsp,$j,8) # tp[j-1] mov %rdx,$N[0] - movq %xmm0,$m0 # bp[i+1] mov $N[1],-16(%rsp,$j,8) # tp[j-1] xor $N[1],$N[1] @@ -809,13 +899,7 @@ ___ $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax -___ -$code.=<<___ if ($win64); - movaps (%rsi),%xmm6 - movaps 0x10(%rsi),%xmm7 - lea 0x28(%rsi),%rsi -___ -$code.=<<___; + mov (%rsi),%r15 mov 8(%rsi),%r14 mov 16(%rsi),%r13 @@ -830,8 +914,8 @@ ___ }}} { -my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx"); # Unix order +my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9d") : # Win64 order + ("%rdi","%rsi","%rdx","%ecx"); # Unix order my $out=$inp; my $STRIDE=2**5*8; my $N=$STRIDE/4; @@ -859,53 +943,89 @@ bn_scatter5: .type bn_gather5,\@abi-omnipotent .align 16 bn_gather5: -___ -$code.=<<___ if ($win64); .LSEH_begin_bn_gather5: # I can't trust assembler to use specific encoding:-( - .byte 0x48,0x83,0xec,0x28 #sub \$0x28,%rsp - .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) - .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) -___ -$code.=<<___; - mov $idx,%r11 - shr \$`log($N/8)/log(2)`,$idx - and \$`$N/8-1`,%r11 - not $idx - lea .Lmagic_masks(%rip),%rax - and \$`2**5/($N/8)-1`,$idx # 5 is "window size" - lea 96($tbl,%r11,8),$tbl # pointer within 1st cache line - movq 0(%rax,$idx,8),%xmm4 # set of masks denoting which - movq 8(%rax,$idx,8),%xmm5 # cache line contains element - movq 16(%rax,$idx,8),%xmm6 # denoted by 7th argument - movq 24(%rax,$idx,8),%xmm7 + .byte 0x4c,0x8d,0x14,0x24 # lea (%rsp),%r10 + .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 # sub $0x108,%rsp + lea .Linc(%rip),%rax + and \$-16,%rsp # shouldn't be formally required + + movd $idx,%xmm5 + movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 + movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 + lea 128($tbl),%r11 # size optimization + lea 128(%rsp),%rax # size optimization + + pshufd \$0,%xmm5,%xmm5 # broadcast $idx + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 +___ +######################################################################## +# calculate mask by comparing 0..31 to $idx and save result to stack +# +for($i=0;$i<$STRIDE/16;$i+=4) { +$code.=<<___; + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 # compare to 1,0 +___ +$code.=<<___ if ($i); + movdqa %xmm3,`16*($i-1)-128`(%rax) +___ +$code.=<<___; + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 # compare to 3,2 + movdqa %xmm0,`16*($i+0)-128`(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 # compare to 5,4 + movdqa %xmm1,`16*($i+1)-128`(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 # compare to 7,6 + movdqa %xmm2,`16*($i+2)-128`(%rax) + movdqa %xmm4,%xmm2 +___ +} +$code.=<<___; + movdqa %xmm3,`16*($i-1)-128`(%rax) jmp .Lgather -.align 16 -.Lgather: - movq `0*$STRIDE/4-96`($tbl),%xmm0 - movq `1*$STRIDE/4-96`($tbl),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($tbl),%xmm2 - pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($tbl),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 - por %xmm2,%xmm0 - lea $STRIDE($tbl),$tbl - por %xmm3,%xmm0 +.align 32 +.Lgather: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +for($i=0;$i<$STRIDE/16;$i+=4) { +$code.=<<___; + movdqa `16*($i+0)-128`(%r11),%xmm0 + movdqa `16*($i+1)-128`(%r11),%xmm1 + movdqa `16*($i+2)-128`(%r11),%xmm2 + pand `16*($i+0)-128`(%rax),%xmm0 + movdqa `16*($i+3)-128`(%r11),%xmm3 + pand `16*($i+1)-128`(%rax),%xmm1 + por %xmm0,%xmm4 + pand `16*($i+2)-128`(%rax),%xmm2 + por %xmm1,%xmm5 + pand `16*($i+3)-128`(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 +___ +} +$code.=<<___; + por %xmm5,%xmm4 + lea $STRIDE(%r11),%r11 + pshufd \$0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 movq %xmm0,($out) # m0=bp[0] lea 8($out),$out sub \$1,$num jnz .Lgather -___ -$code.=<<___ if ($win64); - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - lea 0x28(%rsp),%rsp -___ -$code.=<<___; + + lea (%r10),%rsp ret .LSEH_end_bn_gather5: .size bn_gather5,.-bn_gather5 @@ -913,9 +1033,9 @@ ___ } $code.=<<___; .align 64 -.Lmagic_masks: - .long 0,0, 0,0, 0,0, -1,-1 - .long 0,0, 0,0, 0,0, 0,0 +.Linc: + .long 0,0, 1,1 + .long 2,2, 2,2 .asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by " ___ @@ -954,7 +1074,7 @@ mul_handler: cmp %r10,%rbx # context->RipR13 mov %r14,232($context) # restore context->R14 mov %r15,240($context) # restore context->R15 - movups %xmm0,512($context) # restore context->Xmm6 - movups %xmm1,528($context) # restore context->Xmm7 .Lcommon_seh_tail: mov 8(%rax),%rdi @@ -1057,10 +1173,9 @@ mul_handler: .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] .align 8 .LSEH_info_bn_gather5: - .byte 0x01,0x0d,0x05,0x00 - .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 - .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 - .byte 0x04,0x42,0x00,0x00 #sub rsp,0x28 + .byte 0x01,0x0b,0x03,0x0a + .byte 0x0b,0x01,0x21,0x00 # sub rsp,0x108 + .byte 0x04,0xa3,0x00,0x00 # lea r10,(rsp), set_frame r10 .align 8 ___ } Index: openssl-1.0.1e/crypto/Makefile =================================================================== --- openssl-1.0.1e.orig/crypto/Makefile +++ openssl-1.0.1e/crypto/Makefile @@ -31,6 +31,7 @@ CPUID_OBJ=mem_clr.o LIBS= GENERAL=Makefile README crypto-lib.com install.com +TEST=constant_time_test.c LIB= $(TOP)/libcrypto.a SHARED_LIB= libcrypto$(SHLIB_EXT) Index: openssl-1.0.1e/crypto/constant_time_locl.h =================================================================== --- /dev/null +++ openssl-1.0.1e/crypto/constant_time_locl.h @@ -0,0 +1,216 @@ +/* crypto/constant_time_locl.h */ +/* + * Utilities for constant-time cryptography. + * + * Author: Emilia Kasper (emilia@openssl.org) + * Based on previous work by Bodo Moeller, Emilia Kasper, Adam Langley + * (Google). + * ==================================================================== + * Copyright (c) 2014 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#ifndef HEADER_CONSTANT_TIME_LOCL_H +#define HEADER_CONSTANT_TIME_LOCL_H + +#include "e_os.h" /* For 'inline' */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The boolean methods return a bitmask of all ones (0xff...f) for true + * and 0 for false. This is useful for choosing a value based on the result + * of a conditional in constant time. For example, + * + * if (a < b) { + * c = a; + * } else { + * c = b; + * } + * + * can be written as + * + * unsigned int lt = constant_time_lt(a, b); + * c = constant_time_select(lt, a, b); + */ + +/* + * Returns the given value with the MSB copied to all the other + * bits. Uses the fact that arithmetic shift shifts-in the sign bit. + * However, this is not ensured by the C standard so you may need to + * replace this with something else on odd CPUs. + */ +static inline unsigned int constant_time_msb(unsigned int a); + +/* + * Returns 0xff..f if a < b and 0 otherwise. + */ +static inline unsigned int constant_time_lt(unsigned int a, unsigned int b); +/* Convenience method for getting an 8-bit mask. */ +static inline unsigned char constant_time_lt_8(unsigned int a, unsigned int b); + +/* + * Returns 0xff..f if a >= b and 0 otherwise. + */ +static inline unsigned int constant_time_ge(unsigned int a, unsigned int b); +/* Convenience method for getting an 8-bit mask. */ +static inline unsigned char constant_time_ge_8(unsigned int a, unsigned int b); + +/* + * Returns 0xff..f if a == 0 and 0 otherwise. + */ +static inline unsigned int constant_time_is_zero(unsigned int a); +/* Convenience method for getting an 8-bit mask. */ +static inline unsigned char constant_time_is_zero_8(unsigned int a); + + +/* + * Returns 0xff..f if a == b and 0 otherwise. + */ +static inline unsigned int constant_time_eq(unsigned int a, unsigned int b); +/* Convenience method for getting an 8-bit mask. */ +static inline unsigned char constant_time_eq_8(unsigned int a, unsigned int b); +/* Signed integers. */ +static inline unsigned int constant_time_eq_int(int a, int b); +/* Convenience method for getting an 8-bit mask. */ +static inline unsigned char constant_time_eq_int_8(int a, int b); + + +/* + * Returns (mask & a) | (~mask & b). + * + * When |mask| is all 1s or all 0s (as returned by the methods above), + * the select methods return either |a| (if |mask| is nonzero) or |b| + * (if |mask| is zero). + */ +static inline unsigned int constant_time_select(unsigned int mask, + unsigned int a, unsigned int b); +/* Convenience method for unsigned chars. */ +static inline unsigned char constant_time_select_8(unsigned char mask, + unsigned char a, unsigned char b); +/* Convenience method for signed integers. */ +static inline int constant_time_select_int(unsigned int mask, int a, int b); + +static inline unsigned int constant_time_msb(unsigned int a) + { + return (unsigned int)((int)(a) >> (sizeof(int) * 8 - 1)); + } + +static inline unsigned int constant_time_lt(unsigned int a, unsigned int b) + { + unsigned int lt; + /* Case 1: msb(a) == msb(b). a < b iff the MSB of a - b is set.*/ + lt = ~(a ^ b) & (a - b); + /* Case 2: msb(a) != msb(b). a < b iff the MSB of b is set. */ + lt |= ~a & b; + return constant_time_msb(lt); + } + +static inline unsigned char constant_time_lt_8(unsigned int a, unsigned int b) + { + return (unsigned char)(constant_time_lt(a, b)); + } + +static inline unsigned int constant_time_ge(unsigned int a, unsigned int b) + { + unsigned int ge; + /* Case 1: msb(a) == msb(b). a >= b iff the MSB of a - b is not set.*/ + ge = ~((a ^ b) | (a - b)); + /* Case 2: msb(a) != msb(b). a >= b iff the MSB of a is set. */ + ge |= a & ~b; + return constant_time_msb(ge); + } + +static inline unsigned char constant_time_ge_8(unsigned int a, unsigned int b) + { + return (unsigned char)(constant_time_ge(a, b)); + } + +static inline unsigned int constant_time_is_zero(unsigned int a) + { + return constant_time_msb(~a & (a - 1)); + } + +static inline unsigned char constant_time_is_zero_8(unsigned int a) + { + return (unsigned char)(constant_time_is_zero(a)); + } + +static inline unsigned int constant_time_eq(unsigned int a, unsigned int b) + { + return constant_time_is_zero(a ^ b); + } + +static inline unsigned char constant_time_eq_8(unsigned int a, unsigned int b) + { + return (unsigned char)(constant_time_eq(a, b)); + } + +static inline unsigned int constant_time_eq_int(int a, int b) + { + return constant_time_eq((unsigned)(a), (unsigned)(b)); + } + +static inline unsigned char constant_time_eq_int_8(int a, int b) + { + return constant_time_eq_8((unsigned)(a), (unsigned)(b)); + } + +static inline unsigned int constant_time_select(unsigned int mask, + unsigned int a, unsigned int b) + { + return (mask & a) | (~mask & b); + } + +static inline unsigned char constant_time_select_8(unsigned char mask, + unsigned char a, unsigned char b) + { + return (unsigned char)(constant_time_select(mask, a, b)); + } + +inline int constant_time_select_int(unsigned int mask, int a, int b) + { + return (int)(constant_time_select(mask, (unsigned)(a), (unsigned)(b))); + } + +#ifdef __cplusplus +} +#endif + +#endif /* HEADER_CONSTANT_TIME_LOCL_H */ Index: openssl-1.0.1e/crypto/constant_time_test.c =================================================================== --- /dev/null +++ openssl-1.0.1e/crypto/constant_time_test.c @@ -0,0 +1,328 @@ +/* crypto/constant_time_test.c */ +/* + * Utilities for constant-time cryptography. + * + * Author: Emilia Kasper (emilia@openssl.org) + * Based on previous work by Bodo Moeller, Emilia Kasper, Adam Langley + * (Google). + * ==================================================================== + * Copyright (c) 2014 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include "../crypto/constant_time_locl.h" + +#include +#include +#include + +static const unsigned int CONSTTIME_TRUE = (unsigned)(~0); +static const unsigned int CONSTTIME_FALSE = 0; +static const unsigned char CONSTTIME_TRUE_8 = 0xff; +static const unsigned char CONSTTIME_FALSE_8 = 0; + +static int test_binary_op(unsigned int (*op)(unsigned int a, unsigned int b), + const char* op_name, unsigned int a, unsigned int b, int is_true) + { + unsigned c = op(a, b); + if (is_true && c != CONSTTIME_TRUE) + { + fprintf(stderr, "Test failed for %s(%du, %du): expected %du " + "(TRUE), got %du\n", op_name, a, b, CONSTTIME_TRUE, c); + return 1; + } + else if (!is_true && c != CONSTTIME_FALSE) + { + fprintf(stderr, "Test failed for %s(%du, %du): expected %du " + "(FALSE), got %du\n", op_name, a, b, CONSTTIME_FALSE, + c); + return 1; + } + return 0; + } + +static int test_binary_op_8(unsigned char (*op)(unsigned int a, unsigned int b), + const char* op_name, unsigned int a, unsigned int b, int is_true) + { + unsigned char c = op(a, b); + if (is_true && c != CONSTTIME_TRUE_8) + { + fprintf(stderr, "Test failed for %s(%du, %du): expected %u " + "(TRUE), got %u\n", op_name, a, b, CONSTTIME_TRUE_8, c); + return 1; + } + else if (!is_true && c != CONSTTIME_FALSE_8) + { + fprintf(stderr, "Test failed for %s(%du, %du): expected %u " + "(FALSE), got %u\n", op_name, a, b, CONSTTIME_FALSE_8, + c); + return 1; + } + return 0; + } + +static int test_is_zero(unsigned int a) + { + unsigned int c = constant_time_is_zero(a); + if (a == 0 && c != CONSTTIME_TRUE) + { + fprintf(stderr, "Test failed for constant_time_is_zero(%du): " + "expected %du (TRUE), got %du\n", a, CONSTTIME_TRUE, c); + return 1; + } + else if (a != 0 && c != CONSTTIME_FALSE) + { + fprintf(stderr, "Test failed for constant_time_is_zero(%du): " + "expected %du (FALSE), got %du\n", a, CONSTTIME_FALSE, + c); + return 1; + } + return 0; + } + +static int test_is_zero_8(unsigned int a) + { + unsigned char c = constant_time_is_zero_8(a); + if (a == 0 && c != CONSTTIME_TRUE_8) + { + fprintf(stderr, "Test failed for constant_time_is_zero(%du): " + "expected %u (TRUE), got %u\n", a, CONSTTIME_TRUE_8, c); + return 1; + } + else if (a != 0 && c != CONSTTIME_FALSE) + { + fprintf(stderr, "Test failed for constant_time_is_zero(%du): " + "expected %u (FALSE), got %u\n", a, CONSTTIME_FALSE_8, + c); + return 1; + } + return 0; + } + +static int test_select(unsigned int a, unsigned int b) + { + unsigned int selected = constant_time_select(CONSTTIME_TRUE, a, b); + if (selected != a) + { + fprintf(stderr, "Test failed for constant_time_select(%du, %du," + "%du): expected %du(first value), got %du\n", + CONSTTIME_TRUE, a, b, a, selected); + return 1; + } + selected = constant_time_select(CONSTTIME_FALSE, a, b); + if (selected != b) + { + fprintf(stderr, "Test failed for constant_time_select(%du, %du," + "%du): expected %du(second value), got %du\n", + CONSTTIME_FALSE, a, b, b, selected); + return 1; + } + return 0; + } + +static int test_select_8(unsigned char a, unsigned char b) + { + unsigned char selected = constant_time_select_8(CONSTTIME_TRUE_8, a, b); + if (selected != a) + { + fprintf(stderr, "Test failed for constant_time_select(%u, %u," + "%u): expected %u(first value), got %u\n", + CONSTTIME_TRUE, a, b, a, selected); + return 1; + } + selected = constant_time_select_8(CONSTTIME_FALSE_8, a, b); + if (selected != b) + { + fprintf(stderr, "Test failed for constant_time_select(%u, %u," + "%u): expected %u(second value), got %u\n", + CONSTTIME_FALSE, a, b, b, selected); + return 1; + } + return 0; + } + +static int test_select_int(int a, int b) + { + int selected = constant_time_select_int(CONSTTIME_TRUE, a, b); + if (selected != a) + { + fprintf(stderr, "Test failed for constant_time_select(%du, %d," + "%d): expected %d(first value), got %d\n", + CONSTTIME_TRUE, a, b, a, selected); + return 1; + } + selected = constant_time_select_int(CONSTTIME_FALSE, a, b); + if (selected != b) + { + fprintf(stderr, "Test failed for constant_time_select(%du, %d," + "%d): expected %d(second value), got %d\n", + CONSTTIME_FALSE, a, b, b, selected); + return 1; + } + return 0; + } + +static int test_eq_int(int a, int b) + { + unsigned int equal = constant_time_eq_int(a, b); + if (a == b && equal != CONSTTIME_TRUE) + { + fprintf(stderr, "Test failed for constant_time_select(%d, %d): " + "expected %du(TRUE), got %du\n", + a, b, CONSTTIME_TRUE, equal); + return 1; + } + else if (a != b && equal != CONSTTIME_FALSE) + { + fprintf(stderr, "Test failed for constant_time_select(%d, %d): " + "expected %du(FALSE), got %du\n", + a, b, CONSTTIME_FALSE, equal); + return 1; + } + return 0; + } + +static int test_eq_int_8(int a, int b) + { + unsigned char equal = constant_time_eq_int_8(a, b); + if (a == b && equal != CONSTTIME_TRUE_8) + { + fprintf(stderr, "Test failed for constant_time_select(%d, %d): " + "expected %u(TRUE), got %u\n", + a, b, CONSTTIME_TRUE_8, equal); + return 1; + } + else if (a != b && equal != CONSTTIME_FALSE_8) + { + fprintf(stderr, "Test failed for constant_time_select(%d, %d): " + "expected %u(FALSE), got %u\n", + a, b, CONSTTIME_FALSE_8, equal); + return 1; + } + return 0; + } + +static unsigned int test_values[] = {0, 1, 1024, 12345, 32000, UINT_MAX/2-1, + UINT_MAX/2, UINT_MAX/2+1, UINT_MAX-1, + UINT_MAX}; + +static unsigned char test_values_8[] = {0, 1, 2, 20, 32, 127, 128, 129, 255}; + +static int signed_test_values[] = {0, 1, -1, 1024, -1024, 12345, -12345, + 32000, -32000, INT_MAX, INT_MIN, INT_MAX-1, + INT_MIN+1}; + + +int main(int argc, char *argv[]) + { + unsigned int a, b, i, j; + int c, d; + unsigned char e, f; + int num_failed = 0, num_all = 0; + fprintf(stdout, "Testing constant time operations...\n"); + + for (i = 0; i < sizeof(test_values)/sizeof(int); ++i) + { + a = test_values[i]; + num_failed += test_is_zero(a); + num_failed += test_is_zero_8(a); + num_all += 2; + for (j = 0; j < sizeof(test_values)/sizeof(int); ++j) + { + b = test_values[j]; + num_failed += test_binary_op(&constant_time_lt, + "constant_time_lt", a, b, a < b); + num_failed += test_binary_op_8(&constant_time_lt_8, + "constant_time_lt_8", a, b, a < b); + num_failed += test_binary_op(&constant_time_lt, + "constant_time_lt_8", b, a, b < a); + num_failed += test_binary_op_8(&constant_time_lt_8, + "constant_time_lt_8", b, a, b < a); + num_failed += test_binary_op(&constant_time_ge, + "constant_time_ge", a, b, a >= b); + num_failed += test_binary_op_8(&constant_time_ge_8, + "constant_time_ge_8", a, b, a >= b); + num_failed += test_binary_op(&constant_time_ge, + "constant_time_ge", b, a, b >= a); + num_failed += test_binary_op_8(&constant_time_ge_8, + "constant_time_ge_8", b, a, b >= a); + num_failed += test_binary_op(&constant_time_eq, + "constant_time_eq", a, b, a == b); + num_failed += test_binary_op_8(&constant_time_eq_8, + "constant_time_eq_8", a, b, a == b); + num_failed += test_binary_op(&constant_time_eq, + "constant_time_eq", b, a, b == a); + num_failed += test_binary_op_8(&constant_time_eq_8, + "constant_time_eq_8", b, a, b == a); + num_failed += test_select(a, b); + num_all += 13; + } + } + + for (i = 0; i < sizeof(signed_test_values)/sizeof(int); ++i) + { + c = signed_test_values[i]; + for (j = 0; j < sizeof(signed_test_values)/sizeof(int); ++j) + { + d = signed_test_values[j]; + num_failed += test_select_int(c, d); + num_all += 1; + } + } + + for (i = 0; i < sizeof(test_values_8); ++i) + { + e = test_values_8[i]; + for (j = 0; j < sizeof(test_values_8); ++j) + { + f = test_values_8[j]; + num_failed += test_select_8(e, f); + num_all += 1; + } + } + + if (!num_failed) + { + fprintf(stdout, "ok (ran %d tests)\n", num_all); + return EXIT_SUCCESS; + } + else + { + fprintf(stdout, "%d of %d tests failed!\n", num_failed, num_all); + return EXIT_FAILURE; + } + } Index: openssl-1.0.1e/crypto/rsa/Makefile =================================================================== --- openssl-1.0.1e.orig/crypto/rsa/Makefile +++ openssl-1.0.1e/crypto/rsa/Makefile @@ -212,7 +212,7 @@ rsa_oaep.o: ../../include/openssl/openss rsa_oaep.o: ../../include/openssl/rand.h ../../include/openssl/rsa.h rsa_oaep.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h rsa_oaep.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -rsa_oaep.o: ../cryptlib.h rsa_oaep.c +rsa_oaep.o: ../constant_time_locl.h ../cryptlib.h rsa_oaep.c rsa_pk1.o: ../../e_os.h ../../include/openssl/asn1.h rsa_pk1.o: ../../include/openssl/bio.h ../../include/openssl/bn.h rsa_pk1.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h @@ -221,7 +221,8 @@ rsa_pk1.o: ../../include/openssl/lhash.h rsa_pk1.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h rsa_pk1.o: ../../include/openssl/rand.h ../../include/openssl/rsa.h rsa_pk1.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -rsa_pk1.o: ../../include/openssl/symhacks.h ../cryptlib.h rsa_pk1.c +rsa_pk1.o: ../../include/openssl/symhacks.h ../constant_time_locl.h +rsa_pk1.o: ../cryptlib.h rsa_pk1.c rsa_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h rsa_pmeth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h rsa_pmeth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h Index: openssl-1.0.1e/crypto/rsa/rsa.h =================================================================== --- openssl-1.0.1e.orig/crypto/rsa/rsa.h +++ openssl-1.0.1e/crypto/rsa/rsa.h @@ -559,6 +559,7 @@ void ERR_load_RSA_strings(void); #define RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE 158 #define RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 148 #define RSA_R_PADDING_CHECK_FAILED 114 +#define RSA_R_PKCS_DECODING_ERROR 159 #define RSA_R_P_NOT_PRIME 128 #define RSA_R_Q_NOT_PRIME 129 #define RSA_R_RSA_OPERATIONS_NOT_SUPPORTED 130 Index: openssl-1.0.1e/crypto/rsa/rsa_err.c =================================================================== --- openssl-1.0.1e.orig/crypto/rsa/rsa_err.c +++ openssl-1.0.1e/crypto/rsa/rsa_err.c @@ -175,6 +175,7 @@ static ERR_STRING_DATA RSA_str_reasons[] {ERR_REASON(RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE),"operation not allowed in fips mode"}, {ERR_REASON(RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE),"operation not supported for this keytype"}, {ERR_REASON(RSA_R_PADDING_CHECK_FAILED) ,"padding check failed"}, +{ERR_REASON(RSA_R_PKCS_DECODING_ERROR) ,"pkcs decoding error"}, {ERR_REASON(RSA_R_P_NOT_PRIME) ,"p not prime"}, {ERR_REASON(RSA_R_Q_NOT_PRIME) ,"q not prime"}, {ERR_REASON(RSA_R_RSA_OPERATIONS_NOT_SUPPORTED),"rsa operations not supported"}, Index: openssl-1.0.1e/crypto/rsa/rsa_oaep.c =================================================================== --- openssl-1.0.1e.orig/crypto/rsa/rsa_oaep.c +++ openssl-1.0.1e/crypto/rsa/rsa_oaep.c @@ -18,6 +18,7 @@ * an equivalent notion. */ +#include "../constant_time_locl.h" #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) #include @@ -95,92 +96,117 @@ int RSA_padding_check_PKCS1_OAEP(unsigne const unsigned char *from, int flen, int num, const unsigned char *param, int plen) { - int i, dblen, mlen = -1; - const unsigned char *maskeddb; - int lzero; - unsigned char *db = NULL, seed[SHA_DIGEST_LENGTH], phash[SHA_DIGEST_LENGTH]; - unsigned char *padded_from; - int bad = 0; - - if (--num < 2 * SHA_DIGEST_LENGTH + 1) - /* 'num' is the length of the modulus, i.e. does not depend on the - * particular ciphertext. */ + int i, dblen, mlen = -1, one_index = 0, msg_index; + unsigned int good, found_one_byte; + const unsigned char *maskedseed, *maskeddb; + /* |em| is the encoded message, zero-padded to exactly |num| bytes: + * em = Y || maskedSeed || maskedDB */ + unsigned char *db = NULL, *em = NULL, seed[EVP_MAX_MD_SIZE], + phash[EVP_MAX_MD_SIZE]; + + if (tlen <= 0 || flen <= 0) + return -1; + + /* + * |num| is the length of the modulus; |flen| is the length of the + * encoded message. Therefore, for any |from| that was obtained by + * decrypting a ciphertext, we must have |flen| <= |num|. Similarly, + * num < 2 * SHA_DIGEST_LENGTH + 2 must hold for the modulus + * irrespective of the ciphertext, see PKCS #1 v2.2, section 7.1.2. + * This does not leak any side-channel information. + */ + if (num < flen || num < 2 * SHA_DIGEST_LENGTH + 2) goto decoding_err; - lzero = num - flen; - if (lzero < 0) - { - /* signalling this error immediately after detection might allow - * for side-channel attacks (e.g. timing if 'plen' is huge - * -- cf. James H. Manger, "A Chosen Ciphertext Attack on RSA Optimal - * Asymmetric Encryption Padding (OAEP) [...]", CRYPTO 2001), - * so we use a 'bad' flag */ - bad = 1; - lzero = 0; - flen = num; /* don't overflow the memcpy to padded_from */ - } - - dblen = num - SHA_DIGEST_LENGTH; - db = OPENSSL_malloc(dblen + num); - if (db == NULL) + dblen = num - SHA_DIGEST_LENGTH - 1; + db = OPENSSL_malloc(dblen); + em = OPENSSL_malloc(num); + if (db == NULL || em == NULL) { RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, ERR_R_MALLOC_FAILURE); - return -1; + goto cleanup; } - /* Always do this zero-padding copy (even when lzero == 0) - * to avoid leaking timing info about the value of lzero. */ - padded_from = db + dblen; - memset(padded_from, 0, lzero); - memcpy(padded_from + lzero, from, flen); + /* + * Always do this zero-padding copy (even when num == flen) to avoid + * leaking that information. The copy still leaks some side-channel + * information, but it's impossible to have a fixed memory access + * pattern since we can't read out of the bounds of |from|. + * + * TODO(emilia): Consider porting BN_bn2bin_padded from BoringSSL. + */ + memset(em, 0, num); + memcpy(em + num - flen, from, flen); + + /* + * The first byte must be zero, however we must not leak if this is + * true. See James H. Manger, "A Chosen Ciphertext Attack on RSA + * Optimal Asymmetric Encryption Padding (OAEP) [...]", CRYPTO 2001). + */ + good = constant_time_is_zero(em[0]); - maskeddb = padded_from + SHA_DIGEST_LENGTH; + maskedseed = em + 1; + maskeddb = em + 1 + SHA_DIGEST_LENGTH; if (MGF1(seed, SHA_DIGEST_LENGTH, maskeddb, dblen)) - return -1; + goto cleanup; for (i = 0; i < SHA_DIGEST_LENGTH; i++) - seed[i] ^= padded_from[i]; - + seed[i] ^= maskedseed[i]; + if (MGF1(db, dblen, seed, SHA_DIGEST_LENGTH)) - return -1; + goto cleanup; for (i = 0; i < dblen; i++) db[i] ^= maskeddb[i]; if (!EVP_Digest((void *)param, plen, phash, NULL, EVP_sha1(), NULL)) - return -1; + goto cleanup; - if (CRYPTO_memcmp(db, phash, SHA_DIGEST_LENGTH) != 0 || bad) + good &= constant_time_is_zero(CRYPTO_memcmp(db, phash, SHA_DIGEST_LENGTH)); + + found_one_byte = 0; + for (i = SHA_DIGEST_LENGTH; i < dblen; i++) + { + /* Padding consists of a number of 0-bytes, followed by a 1. */ + unsigned int equals1 = constant_time_eq(db[i], 1); + unsigned int equals0 = constant_time_is_zero(db[i]); + one_index = constant_time_select_int(~found_one_byte & equals1, + i, one_index); + found_one_byte |= equals1; + good &= (found_one_byte | equals0); + } + + good &= found_one_byte; + + /* + * At this point |good| is zero unless the plaintext was valid, + * so plaintext-awareness ensures timing side-channels are no longer a + * concern. + */ + if (!good) goto decoding_err; + + msg_index = one_index + 1; + mlen = dblen - msg_index; + + if (tlen < mlen) + { + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_DATA_TOO_LARGE); + mlen = -1; + } else { - for (i = SHA_DIGEST_LENGTH; i < dblen; i++) - if (db[i] != 0x00) - break; - if (i == dblen || db[i] != 0x01) - goto decoding_err; - else - { - /* everything looks OK */ - - mlen = dblen - ++i; - if (tlen < mlen) - { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_DATA_TOO_LARGE); - mlen = -1; - } - else - memcpy(to, db + i, mlen); - } + memcpy(to, db + msg_index, mlen); + goto cleanup; } - OPENSSL_free(db); - return mlen; decoding_err: - /* to avoid chosen ciphertext attacks, the error message should not reveal - * which kind of decoding error happened */ + /* To avoid chosen ciphertext attacks, the error message should not reveal + * which kind of decoding error happened. */ RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_OAEP_DECODING_ERROR); +cleanup: if (db != NULL) OPENSSL_free(db); - return -1; + if (em != NULL) OPENSSL_free(em); + return mlen; } int PKCS1_MGF1(unsigned char *mask, long len, Index: openssl-1.0.1e/crypto/rsa/rsa_pk1.c =================================================================== --- openssl-1.0.1e.orig/crypto/rsa/rsa_pk1.c +++ openssl-1.0.1e/crypto/rsa/rsa_pk1.c @@ -56,6 +56,8 @@ * [including the GNU Public Licence.] */ +#include "../constant_time_locl.h" + #include #include "cryptlib.h" #include @@ -181,44 +183,87 @@ int RSA_padding_add_PKCS1_type_2(unsigne int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen, const unsigned char *from, int flen, int num) { - int i,j; - const unsigned char *p; - - p=from; - if ((num != (flen+1)) || (*(p++) != 02)) - { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2,RSA_R_BLOCK_TYPE_IS_NOT_02); - return(-1); - } -#ifdef PKCS1_CHECK - return(num-11); -#endif - - /* scan over padding data */ - j=flen-1; /* one for type. */ - for (i=0; i tlen) - { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2,RSA_R_DATA_TOO_LARGE); - return(-1); - } - memcpy(to,p,(unsigned int)j); - - return(j); + int i; + /* |em| is the encoded message, zero-padded to exactly |num| bytes */ + unsigned char *em = NULL; + unsigned int good, found_zero_byte; + int zero_index = 0, msg_index, mlen = -1; + + if (tlen < 0 || flen < 0) + return -1; + + /* PKCS#1 v1.5 decryption. See "PKCS #1 v2.2: RSA Cryptography + * Standard", section 7.2.2. */ + + if (flen > num) + goto err; + + if (num < 11) + goto err; + + em = OPENSSL_malloc(num); + if (em == NULL) + { + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, ERR_R_MALLOC_FAILURE); + return -1; + } + memset(em, 0, num); + /* + * Always do this zero-padding copy (even when num == flen) to avoid + * leaking that information. The copy still leaks some side-channel + * information, but it's impossible to have a fixed memory access + * pattern since we can't read out of the bounds of |from|. + * + * TODO(emilia): Consider porting BN_bn2bin_padded from BoringSSL. + */ + memcpy(em + num - flen, from, flen); + + good = constant_time_is_zero(em[0]); + good &= constant_time_eq(em[1], 2); + + found_zero_byte = 0; + for (i = 2; i < num; i++) + { + unsigned int equals0 = constant_time_is_zero(em[i]); + zero_index = constant_time_select_int(~found_zero_byte & equals0, i, zero_index); + found_zero_byte |= equals0; + } + + /* + * PS must be at least 8 bytes long, and it starts two bytes into |em|. + * If we never found a 0-byte, then |zero_index| is 0 and the check + * also fails. + */ + good &= constant_time_ge((unsigned int)(zero_index), 2 + 8); + + /* Skip the zero byte. This is incorrect if we never found a zero-byte + * but in this case we also do not copy the message out. */ + msg_index = zero_index + 1; + mlen = num - msg_index; + + /* For good measure, do this check in constant time as well; it could + * leak something if |tlen| was assuming valid padding. */ + good &= constant_time_ge((unsigned int)(tlen), (unsigned int)(mlen)); + + /* + * We can't continue in constant-time because we need to copy the result + * and we cannot fake its length. This unavoidably leaks timing + * information at the API boundary. + * TODO(emilia): this could be addressed at the call site, + * see BoringSSL commit 0aa0767340baf925bda4804882aab0cb974b2d26. + */ + if (!good) + { + mlen = -1; + goto err; + } + + memcpy(to, em + msg_index, mlen); + +err: + if (em != NULL) + OPENSSL_free(em); + if (mlen == -1) + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, RSA_R_PKCS_DECODING_ERROR); + return mlen; } - Index: openssl-1.0.1e/ssl/Makefile =================================================================== --- openssl-1.0.1e.orig/ssl/Makefile +++ openssl-1.0.1e/ssl/Makefile @@ -547,26 +547,27 @@ s3_both.o: ../include/openssl/ssl23.h .. s3_both.o: ../include/openssl/stack.h ../include/openssl/symhacks.h s3_both.o: ../include/openssl/tls1.h ../include/openssl/x509.h s3_both.o: ../include/openssl/x509_vfy.h s3_both.c ssl_locl.h -s3_cbc.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h -s3_cbc.o: ../include/openssl/buffer.h ../include/openssl/comp.h -s3_cbc.o: ../include/openssl/crypto.h ../include/openssl/dsa.h -s3_cbc.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h -s3_cbc.o: ../include/openssl/ec.h ../include/openssl/ecdh.h -s3_cbc.o: ../include/openssl/ecdsa.h ../include/openssl/err.h -s3_cbc.o: ../include/openssl/evp.h ../include/openssl/hmac.h -s3_cbc.o: ../include/openssl/kssl.h ../include/openssl/lhash.h -s3_cbc.o: ../include/openssl/md5.h ../include/openssl/obj_mac.h -s3_cbc.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h -s3_cbc.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h -s3_cbc.o: ../include/openssl/pem.h ../include/openssl/pem2.h -s3_cbc.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h -s3_cbc.o: ../include/openssl/rsa.h ../include/openssl/safestack.h -s3_cbc.o: ../include/openssl/sha.h ../include/openssl/srtp.h -s3_cbc.o: ../include/openssl/ssl.h ../include/openssl/ssl2.h -s3_cbc.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h -s3_cbc.o: ../include/openssl/stack.h ../include/openssl/symhacks.h -s3_cbc.o: ../include/openssl/tls1.h ../include/openssl/x509.h -s3_cbc.o: ../include/openssl/x509_vfy.h s3_cbc.c ssl_locl.h +s3_cbc.o: ../crypto/constant_time_locl.h ../e_os.h ../include/openssl/asn1.h +s3_cbc.o: ../include/openssl/bio.h ../include/openssl/buffer.h +s3_cbc.o: ../include/openssl/comp.h ../include/openssl/crypto.h +s3_cbc.o: ../include/openssl/dsa.h ../include/openssl/dtls1.h +s3_cbc.o: ../include/openssl/e_os2.h ../include/openssl/ec.h +s3_cbc.o: ../include/openssl/ecdh.h ../include/openssl/ecdsa.h +s3_cbc.o: ../include/openssl/err.h ../include/openssl/evp.h +s3_cbc.o: ../include/openssl/hmac.h ../include/openssl/kssl.h +s3_cbc.o: ../include/openssl/lhash.h ../include/openssl/md5.h +s3_cbc.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h +s3_cbc.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h +s3_cbc.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h +s3_cbc.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h +s3_cbc.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h +s3_cbc.o: ../include/openssl/safestack.h ../include/openssl/sha.h +s3_cbc.o: ../include/openssl/srtp.h ../include/openssl/ssl.h +s3_cbc.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h +s3_cbc.o: ../include/openssl/ssl3.h ../include/openssl/stack.h +s3_cbc.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h +s3_cbc.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s3_cbc.c +s3_cbc.o: ssl_locl.h s3_clnt.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h s3_clnt.o: ../include/openssl/bn.h ../include/openssl/buffer.h s3_clnt.o: ../include/openssl/comp.h ../include/openssl/crypto.h Index: openssl-1.0.1e/ssl/s3_cbc.c =================================================================== --- openssl-1.0.1e.orig/ssl/s3_cbc.c +++ openssl-1.0.1e/ssl/s3_cbc.c @@ -53,6 +53,7 @@ * */ +#include "../crypto/constant_time_locl.h" #include "ssl_locl.h" #include @@ -67,37 +68,6 @@ * supported by TLS.) */ #define MAX_HASH_BLOCK_SIZE 128 -/* Some utility functions are needed: - * - * These macros return the given value with the MSB copied to all the other - * bits. They use the fact that arithmetic shift shifts-in the sign bit. - * However, this is not ensured by the C standard so you may need to replace - * them with something else on odd CPUs. */ -#define DUPLICATE_MSB_TO_ALL(x) ( (unsigned)( (int)(x) >> (sizeof(int)*8-1) ) ) -#define DUPLICATE_MSB_TO_ALL_8(x) ((unsigned char)(DUPLICATE_MSB_TO_ALL(x))) - -/* constant_time_lt returns 0xff if a=b and 0x00 otherwise. */ -static unsigned constant_time_ge(unsigned a, unsigned b) - { - a -= b; - return DUPLICATE_MSB_TO_ALL(~a); - } - -/* constant_time_eq_8 returns 0xff if a==b and 0x00 otherwise. */ -static unsigned char constant_time_eq_8(unsigned a, unsigned b) - { - unsigned c = a ^ b; - c--; - return DUPLICATE_MSB_TO_ALL_8(c); - } - /* ssl3_cbc_remove_padding removes padding from the decrypted, SSLv3, CBC * record in |rec| by updating |rec->length| in constant time. * @@ -126,8 +96,8 @@ int ssl3_cbc_remove_padding(const SSL* s padding_length = good & (padding_length+1); rec->length -= padding_length; rec->type |= padding_length<<8; /* kludge: pass padding length */ - return (int)((good & 1) | (~good & -1)); -} + return constant_time_select_int(good, 1, -1); + } /* tls1_cbc_remove_padding removes the CBC padding from the decrypted, TLS, CBC * record in |rec| in constant time and returns 1 if the padding is valid and @@ -208,7 +178,7 @@ int tls1_cbc_remove_padding(const SSL* s for (i = 0; i < to_check; i++) { - unsigned char mask = constant_time_ge(padding_length, i); + unsigned char mask = constant_time_ge_8(padding_length, i); unsigned char b = rec->data[rec->length-1-i]; /* The final |padding_length+1| bytes should all have the value * |padding_length|. Therefore the XOR should be zero. */ @@ -216,20 +186,14 @@ int tls1_cbc_remove_padding(const SSL* s } /* If any of the final |padding_length+1| bytes had the wrong value, - * one or more of the lower eight bits of |good| will be cleared. We - * AND the bottom 8 bits together and duplicate the result to all the - * bits. */ - good &= good >> 4; - good &= good >> 2; - good &= good >> 1; - good <<= sizeof(good)*8-1; - good = DUPLICATE_MSB_TO_ALL(good); - + * one or more of the lower eight bits of |good| will be cleared. + */ + good = constant_time_eq(0xff, good & 0xff); padding_length = good & (padding_length+1); rec->length -= padding_length; rec->type |= padding_length<<8; /* kludge: pass padding length */ - return (int)((good & 1) | (~good & -1)); + return constant_time_select_int(good, 1, -1); } /* ssl3_cbc_copy_mac copies |md_size| bytes from the end of |rec| to |out| in @@ -296,8 +260,8 @@ void ssl3_cbc_copy_mac(unsigned char* ou memset(rotated_mac, 0, md_size); for (i = scan_start, j = 0; i < orig_len; i++) { - unsigned char mac_started = constant_time_ge(i, mac_start); - unsigned char mac_ended = constant_time_ge(i, mac_end); + unsigned char mac_started = constant_time_ge_8(i, mac_start); + unsigned char mac_ended = constant_time_ge_8(i, mac_end); unsigned char b = rec->data[i]; rotated_mac[j++] |= b & mac_started & ~mac_ended; j &= constant_time_lt(j,md_size); @@ -683,12 +647,12 @@ void ssl3_cbc_digest_record( b = data[k-header_length]; k++; - is_past_c = is_block_a & constant_time_ge(j, c); - is_past_cp1 = is_block_a & constant_time_ge(j, c+1); + is_past_c = is_block_a & constant_time_ge_8(j, c); + is_past_cp1 = is_block_a & constant_time_ge_8(j, c+1); /* If this is the block containing the end of the * application data, and we are at the offset for the * 0x80 value, then overwrite b with 0x80. */ - b = (b&~is_past_c) | (0x80&is_past_c); + b = constant_time_select_8(is_past_c, 0x80, b); /* If this the the block containing the end of the * application data and we're past the 0x80 value then * just write zero. */ @@ -704,7 +668,8 @@ void ssl3_cbc_digest_record( if (j >= md_block_size - md_length_size) { /* If this is index_b, write a length byte. */ - b = (b&~is_block_b) | (is_block_b&length_bytes[j-(md_block_size-md_length_size)]); + b = constant_time_select_8( + is_block_b, length_bytes[j-(md_block_size-md_length_size)], b); } block[j] = b; }