From 5706e920a465d187ba3fe428417b195276aa4da0 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Tue, 25 Aug 2020 19:28:13 +0200 Subject: [PATCH 1/2] Remove a useless zeroization Remove the zeroization of a pointer variable in the AES block functions. The code was valid but spurious and misleading since it looked like a mistaken attempt to zeroize the pointed-to buffer. Reported by Antonio de la Piedra, CEA Leti, France. Note that we do not zeroize the buffer here because these are the round keys, and they need to stay until all the blocks are processed. They will be zeroized in mbedtls_aes_free(). Signed-off-by: Gilles Peskine --- ChangeLog.d/aes-zeroize-pointer.txt | 5 +++++ library/aes.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 ChangeLog.d/aes-zeroize-pointer.txt diff --git a/ChangeLog.d/aes-zeroize-pointer.txt b/ChangeLog.d/aes-zeroize-pointer.txt new file mode 100644 index 0000000000..ccc6dc1596 --- /dev/null +++ b/ChangeLog.d/aes-zeroize-pointer.txt @@ -0,0 +1,5 @@ +Changes + * Remove the zeroization of a pointer variable in AES rounds. It was valid + but spurious and misleading since it looked like a mistaken attempt to + zeroize the pointed-to buffer. Reported by Antonio de la Piedra, CEA + Leti, France. diff --git a/library/aes.c b/library/aes.c index e0c7a11c38..7b5e2c6635 100644 --- a/library/aes.c +++ b/library/aes.c @@ -796,8 +796,6 @@ int mbedtls_internal_aes_encrypt( mbedtls_aes_context *ctx, mbedtls_zeroize( &Y2, sizeof( Y2 ) ); mbedtls_zeroize( &Y3, sizeof( Y3 ) ); - mbedtls_zeroize( &RK, sizeof( RK ) ); - return( 0 ); } #endif /* !MBEDTLS_AES_ENCRYPT_ALT */ @@ -876,8 +874,6 @@ int mbedtls_internal_aes_decrypt( mbedtls_aes_context *ctx, mbedtls_zeroize( &Y2, sizeof( Y2 ) ); mbedtls_zeroize( &Y3, sizeof( Y3 ) ); - mbedtls_zeroize( &RK, sizeof( RK ) ); - return( 0 ); } #endif /* !MBEDTLS_AES_DECRYPT_ALT */ From acbf9eccb55489800f2ca85f37419569af53cb03 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Wed, 26 Aug 2020 17:03:24 +0200 Subject: [PATCH 2/2] Put local variables in a struct This way we can have a single call to mbedtls_zeroize, which saves a few bytes of code size. Additionally, on my PC, I notice a significant speed improvement (x86_64 build with MBEDTLS_AESNI_C disabled, gcc 5.4.0 -O3). I don't have an explanation for that (I expected no measurable difference). Signed-off-by: Gilles Peskine Signed-off-by: Ronald Cron --- library/aes.c | 162 +++++++++++++++++++++++--------------------------- 1 file changed, 76 insertions(+), 86 deletions(-) diff --git a/library/aes.c b/library/aes.c index 7b5e2c6635..b8aee9077b 100644 --- a/library/aes.c +++ b/library/aes.c @@ -740,61 +740,56 @@ int mbedtls_internal_aes_encrypt( mbedtls_aes_context *ctx, unsigned char output[16] ) { int i; - uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3; + uint32_t *RK = ctx->rk; + struct + { + uint32_t X[4]; + uint32_t Y[4]; + } t; - RK = ctx->rk; - - GET_UINT32_LE( X0, input, 0 ); X0 ^= *RK++; - GET_UINT32_LE( X1, input, 4 ); X1 ^= *RK++; - GET_UINT32_LE( X2, input, 8 ); X2 ^= *RK++; - GET_UINT32_LE( X3, input, 12 ); X3 ^= *RK++; + GET_UINT32_LE( t.X[0], input, 0 ); t.X[0] ^= *RK++; + GET_UINT32_LE( t.X[1], input, 4 ); t.X[1] ^= *RK++; + GET_UINT32_LE( t.X[2], input, 8 ); t.X[2] ^= *RK++; + GET_UINT32_LE( t.X[3], input, 12 ); t.X[3] ^= *RK++; for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- ) { - AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); - AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); + AES_FROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); + AES_FROUND( t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3] ); } - AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); + AES_FROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); - X0 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y0 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y1 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y3 >> 24 ) & 0xFF ] << 24 ); + t.X[0] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[0] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[1] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[2] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[3] >> 24 ) & 0xFF ] << 24 ); - X1 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y1 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y2 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y0 >> 24 ) & 0xFF ] << 24 ); + t.X[1] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[1] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[2] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[3] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[0] >> 24 ) & 0xFF ] << 24 ); - X2 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y2 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y3 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y1 >> 24 ) & 0xFF ] << 24 ); + t.X[2] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[2] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[3] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[0] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[1] >> 24 ) & 0xFF ] << 24 ); - X3 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y3 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y0 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y2 >> 24 ) & 0xFF ] << 24 ); + t.X[3] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[3] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[0] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[1] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[2] >> 24 ) & 0xFF ] << 24 ); - PUT_UINT32_LE( X0, output, 0 ); - PUT_UINT32_LE( X1, output, 4 ); - PUT_UINT32_LE( X2, output, 8 ); - PUT_UINT32_LE( X3, output, 12 ); + PUT_UINT32_LE( t.X[0], output, 0 ); + PUT_UINT32_LE( t.X[1], output, 4 ); + PUT_UINT32_LE( t.X[2], output, 8 ); + PUT_UINT32_LE( t.X[3], output, 12 ); - mbedtls_zeroize( &X0, sizeof( X0 ) ); - mbedtls_zeroize( &X1, sizeof( X1 ) ); - mbedtls_zeroize( &X2, sizeof( X2 ) ); - mbedtls_zeroize( &X3, sizeof( X3 ) ); - - mbedtls_zeroize( &Y0, sizeof( Y0 ) ); - mbedtls_zeroize( &Y1, sizeof( Y1 ) ); - mbedtls_zeroize( &Y2, sizeof( Y2 ) ); - mbedtls_zeroize( &Y3, sizeof( Y3 ) ); + mbedtls_zeroize( &t, sizeof( t ) ); return( 0 ); } @@ -818,61 +813,56 @@ int mbedtls_internal_aes_decrypt( mbedtls_aes_context *ctx, unsigned char output[16] ) { int i; - uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3; + uint32_t *RK = ctx->rk; + struct + { + uint32_t X[4]; + uint32_t Y[4]; + } t; - RK = ctx->rk; - - GET_UINT32_LE( X0, input, 0 ); X0 ^= *RK++; - GET_UINT32_LE( X1, input, 4 ); X1 ^= *RK++; - GET_UINT32_LE( X2, input, 8 ); X2 ^= *RK++; - GET_UINT32_LE( X3, input, 12 ); X3 ^= *RK++; + GET_UINT32_LE( t.X[0], input, 0 ); t.X[0] ^= *RK++; + GET_UINT32_LE( t.X[1], input, 4 ); t.X[1] ^= *RK++; + GET_UINT32_LE( t.X[2], input, 8 ); t.X[2] ^= *RK++; + GET_UINT32_LE( t.X[3], input, 12 ); t.X[3] ^= *RK++; for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- ) { - AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); - AES_RROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); + AES_RROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); + AES_RROUND( t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3] ); } - AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); + AES_RROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); - X0 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y0 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y3 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y1 >> 24 ) & 0xFF ] << 24 ); + t.X[0] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[0] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[3] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[2] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[1] >> 24 ) & 0xFF ] << 24 ); - X1 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y1 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y0 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y2 >> 24 ) & 0xFF ] << 24 ); + t.X[1] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[1] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[0] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[3] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[2] >> 24 ) & 0xFF ] << 24 ); - X2 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y2 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y1 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y3 >> 24 ) & 0xFF ] << 24 ); + t.X[2] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[2] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[1] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[0] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[3] >> 24 ) & 0xFF ] << 24 ); - X3 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y3 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y2 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y0 >> 24 ) & 0xFF ] << 24 ); + t.X[3] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[3] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[2] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[1] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[0] >> 24 ) & 0xFF ] << 24 ); - PUT_UINT32_LE( X0, output, 0 ); - PUT_UINT32_LE( X1, output, 4 ); - PUT_UINT32_LE( X2, output, 8 ); - PUT_UINT32_LE( X3, output, 12 ); + PUT_UINT32_LE( t.X[0], output, 0 ); + PUT_UINT32_LE( t.X[1], output, 4 ); + PUT_UINT32_LE( t.X[2], output, 8 ); + PUT_UINT32_LE( t.X[3], output, 12 ); - mbedtls_zeroize( &X0, sizeof( X0 ) ); - mbedtls_zeroize( &X1, sizeof( X1 ) ); - mbedtls_zeroize( &X2, sizeof( X2 ) ); - mbedtls_zeroize( &X3, sizeof( X3 ) ); - - mbedtls_zeroize( &Y0, sizeof( Y0 ) ); - mbedtls_zeroize( &Y1, sizeof( Y1 ) ); - mbedtls_zeroize( &Y2, sizeof( Y2 ) ); - mbedtls_zeroize( &Y3, sizeof( Y3 ) ); + mbedtls_zeroize( &t, sizeof( t ) ); return( 0 ); }