lilliput-ae-reference-implementation

Implementations of Lilliput-AE submitted to the NIST LWC standardization process
git clone https://git.kevinlegouguec.net/lilliput-ae-reference-implementation
Log | Files | Refs | README

commit 35f637aa76a6370aff4f6812d9bfeb9c0c371ec7
parent ec5c219519c4ebeb83e43725f9de3162bf7d5552
Author: Kévin Le Gouguec <kevin.legouguec@airbus.com>
Date:   Wed, 13 Feb 2019 17:54:06 +0100

Merge branch 'alpha-reformulation' into 'master'

Reformulations des considérations d'implémentation sur le tweakey schedule

See merge request paclido/sp3!11
Diffstat:
Mnist/make-package.sh | 2+-
Dsrc/add_tweakeyloop/cipher.c | 2--
Dsrc/add_tweakeyloop/cipher.h | 2--
Dsrc/add_tweakeyloop/constants.h | 2--
Dsrc/add_tweakeyloop/lilliput-ae-i.c | 2--
Dsrc/add_tweakeyloop/lilliput-ae-ii.c | 2--
Dsrc/add_tweakeyloop/lilliput-ae-utils.h | 2--
Dsrc/add_tweakeyloop/lilliput-ae.h | 2--
Dsrc/add_tweakeyloop/tweakey.c | 146-------------------------------------------------------------------------------
Dsrc/add_tweakeyloop/tweakey.h | 2--
Asrc/add_tweakeyunrolled/cipher.c | 2++
Asrc/add_tweakeyunrolled/cipher.h | 2++
Asrc/add_tweakeyunrolled/constants.h | 2++
Asrc/add_tweakeyunrolled/lilliput-ae-i.c | 2++
Asrc/add_tweakeyunrolled/lilliput-ae-ii.c | 2++
Asrc/add_tweakeyunrolled/lilliput-ae-utils.h | 2++
Asrc/add_tweakeyunrolled/lilliput-ae.h | 2++
Asrc/add_tweakeyunrolled/tweakey.c | 182+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/add_tweakeyunrolled/tweakey.h | 2++
Msrc/ref/tweakey.c | 70+++++++++++++++++-----------------------------------------------------
Mtraces/traces-tbc.patch | 2+-
21 files changed, 217 insertions(+), 217 deletions(-)

diff --git a/nist/make-package.sh b/nist/make-package.sh @@ -52,7 +52,7 @@ add-variant () implementations=( ref - add_tweakeyloop + add_tweakeyunrolled add_tweakeysequences ) diff --git a/src/add_tweakeyloop/cipher.c b/src/add_tweakeyloop/cipher.c @@ -1 +0,0 @@ -../ref/cipher.c -\ No newline at end of file diff --git a/src/add_tweakeyloop/cipher.h b/src/add_tweakeyloop/cipher.h @@ -1 +0,0 @@ -../ref/cipher.h -\ No newline at end of file diff --git a/src/add_tweakeyloop/constants.h b/src/add_tweakeyloop/constants.h @@ -1 +0,0 @@ -../ref/constants.h -\ No newline at end of file diff --git a/src/add_tweakeyloop/lilliput-ae-i.c b/src/add_tweakeyloop/lilliput-ae-i.c @@ -1 +0,0 @@ -../ref/lilliput-ae-i.c -\ No newline at end of file diff --git a/src/add_tweakeyloop/lilliput-ae-ii.c b/src/add_tweakeyloop/lilliput-ae-ii.c @@ -1 +0,0 @@ -../ref/lilliput-ae-ii.c -\ No newline at end of file diff --git a/src/add_tweakeyloop/lilliput-ae-utils.h b/src/add_tweakeyloop/lilliput-ae-utils.h @@ -1 +0,0 @@ -../ref/lilliput-ae-utils.h -\ No newline at end of file diff --git a/src/add_tweakeyloop/lilliput-ae.h b/src/add_tweakeyloop/lilliput-ae.h @@ -1 +0,0 @@ -../ref/lilliput-ae.h -\ No newline at end of file diff --git a/src/add_tweakeyloop/tweakey.c b/src/add_tweakeyloop/tweakey.c @@ -1,146 +0,0 @@ -/* -Implementation of the Lilliput-AE tweakable block cipher. - -Author: Kévin Le Gouguec, 2019. - -For more information, feedback or questions, refer to our website: -https://paclido.fr/lilliput-ae - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ - ---- - -This file provides an implementation of Lilliput-TBC's tweakey schedule, -where multiplications by matrices M and M_R to the power n are performed -by applying functions for M and M_R n times. -*/ - -#include <stdint.h> -#include <string.h> - -#include "constants.h" -#include "tweakey.h" - - -#define LANE_BITS 64 -#define LANE_BYTES (LANE_BITS/8) -#define LANES_NB (TWEAKEY_BYTES/LANE_BYTES) - - -void tweakey_state_init( - uint8_t TK[TWEAKEY_BYTES], - const uint8_t key[KEY_BYTES], - const uint8_t tweak[TWEAK_BYTES] -) -{ - memcpy(TK, tweak, TWEAK_BYTES); - memcpy(TK+TWEAK_BYTES, key, KEY_BYTES); -} - - -void tweakey_state_extract( - const uint8_t TK[TWEAKEY_BYTES], - uint8_t round_constant, - uint8_t round_tweakey[ROUND_TWEAKEY_BYTES] -) -{ - memset(round_tweakey, 0, ROUND_TWEAKEY_BYTES); - - for (size_t j=0; j<LANES_NB; j++) - { - const uint8_t *TKj = TK + j*LANE_BYTES; - - for (size_t k=0; k<LANE_BYTES; k++) - { - round_tweakey[k] ^= TKj[k]; - } - } - - round_tweakey[0] ^= round_constant; -} - - -static void _multiply_M(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) -{ - Y[7] = X[6]; - Y[6] = X[5]; - Y[5] = X[5]<<3 ^ X[4]; - Y[4] = X[4]>>3 ^ X[3]; - Y[3] = X[2]; - Y[2] = X[6]<<2 ^ X[1]; - Y[1] = X[0]; - Y[0] = X[7]; -} - -static void _multiply_M2(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) -{ - uint8_t M_X[LANE_BYTES]; - _multiply_M(X, M_X); - _multiply_M(M_X, Y); -} - -static void _multiply_M3(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) -{ - uint8_t M_X[LANE_BYTES]; - uint8_t M2_X[LANE_BYTES]; - _multiply_M(X, M_X); - _multiply_M(M_X, M2_X); - _multiply_M(M2_X, Y); -} - -static void _multiply_MR(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) -{ - Y[0] = X[1]; - Y[1] = X[2]; - Y[2] = X[3] ^ X[4]>>3; - Y[3] = X[4]; - Y[4] = X[5] ^ X[6]<<3; - Y[5] = X[3]<<2 ^ X[6]; - Y[6] = X[7]; - Y[7] = X[0]; -} - -static void _multiply_MR2(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) -{ - uint8_t MR_X[LANE_BYTES]; - _multiply_MR(X, MR_X); - _multiply_MR(MR_X, Y); -} - -static void _multiply_MR3(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) -{ - uint8_t MR_X[LANE_BYTES]; - uint8_t MR2_X[LANE_BYTES]; - _multiply_MR(X, MR_X); - _multiply_MR(MR_X, MR2_X); - _multiply_MR(MR2_X, Y); -} - -typedef void (*matrix_multiplication)(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]); - -static const matrix_multiplication ALPHAS[6] = { - _multiply_M, - _multiply_M2, - _multiply_M3, - _multiply_MR, - _multiply_MR2, - _multiply_MR3 -}; - - -void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES]) -{ - /* Skip lane 0, as it is multiplied by the identity matrix. */ - - for (size_t j=1; j<LANES_NB; j++) - { - uint8_t *TKj = TK + j*LANE_BYTES; - - uint8_t TKj_old[LANE_BYTES]; - memcpy(TKj_old, TKj, LANE_BYTES); - - ALPHAS[j-1](TKj_old, TKj); - } -} diff --git a/src/add_tweakeyloop/tweakey.h b/src/add_tweakeyloop/tweakey.h @@ -1 +0,0 @@ -../ref/tweakey.h -\ No newline at end of file diff --git a/src/add_tweakeyunrolled/cipher.c b/src/add_tweakeyunrolled/cipher.c @@ -0,0 +1 @@ +../ref/cipher.c +\ No newline at end of file diff --git a/src/add_tweakeyunrolled/cipher.h b/src/add_tweakeyunrolled/cipher.h @@ -0,0 +1 @@ +../ref/cipher.h +\ No newline at end of file diff --git a/src/add_tweakeyunrolled/constants.h b/src/add_tweakeyunrolled/constants.h @@ -0,0 +1 @@ +../ref/constants.h +\ No newline at end of file diff --git a/src/add_tweakeyunrolled/lilliput-ae-i.c b/src/add_tweakeyunrolled/lilliput-ae-i.c @@ -0,0 +1 @@ +../ref/lilliput-ae-i.c +\ No newline at end of file diff --git a/src/add_tweakeyunrolled/lilliput-ae-ii.c b/src/add_tweakeyunrolled/lilliput-ae-ii.c @@ -0,0 +1 @@ +../ref/lilliput-ae-ii.c +\ No newline at end of file diff --git a/src/add_tweakeyunrolled/lilliput-ae-utils.h b/src/add_tweakeyunrolled/lilliput-ae-utils.h @@ -0,0 +1 @@ +../ref/lilliput-ae-utils.h +\ No newline at end of file diff --git a/src/add_tweakeyunrolled/lilliput-ae.h b/src/add_tweakeyunrolled/lilliput-ae.h @@ -0,0 +1 @@ +../ref/lilliput-ae.h +\ No newline at end of file diff --git a/src/add_tweakeyunrolled/tweakey.c b/src/add_tweakeyunrolled/tweakey.c @@ -0,0 +1,182 @@ +/* +Implementation of the Lilliput-AE tweakable block cipher. + +Author: Kévin Le Gouguec, 2019. + +For more information, feedback or questions, refer to our website: +https://paclido.fr/lilliput-ae + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file provides an implementation of Lilliput-TBC's tweakey schedule, +where multiplications by matrices M and M_R to the power n are performed +by functions expressing the exponentiated matrices with shifts and XORs. +*/ + +#include <stdint.h> +#include <string.h> + +#include "constants.h" +#include "tweakey.h" + + +#define LANE_BITS 64 +#define LANE_BYTES (LANE_BITS/8) +#define LANES_NB (TWEAKEY_BYTES/LANE_BYTES) + + +void tweakey_state_init( + uint8_t TK[TWEAKEY_BYTES], + const uint8_t key[KEY_BYTES], + const uint8_t tweak[TWEAK_BYTES] +) +{ + memcpy(TK, tweak, TWEAK_BYTES); + memcpy(TK+TWEAK_BYTES, key, KEY_BYTES); +} + + +void tweakey_state_extract( + const uint8_t TK[TWEAKEY_BYTES], + uint8_t round_constant, + uint8_t round_tweakey[ROUND_TWEAKEY_BYTES] +) +{ + memset(round_tweakey, 0, ROUND_TWEAKEY_BYTES); + + for (size_t j=0; j<LANES_NB; j++) + { + const uint8_t *TKj = TK + j*LANE_BYTES; + + for (size_t k=0; k<LANE_BYTES; k++) + { + round_tweakey[k] ^= TKj[k]; + } + } + + round_tweakey[0] ^= round_constant; +} + + +static uint8_t _M1(uint8_t x) +{ + return x<<3 ^ x>>3; +} + +static uint8_t _M2(uint8_t x) +{ + return x<<6 ^ (x>>3)<<3 ^ x>>6; +} + +static uint8_t _M3(uint8_t x) +{ + return (uint8_t)(x<<3) >> 3; +} + +static uint8_t _M4(uint8_t x) +{ + return (uint8_t)(x<<2) >> 3; +} + +static void _multiply_M(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) +{ + Y[7] = X[6]; + Y[6] = X[5]; + Y[5] = X[5]<<3 ^ X[4]; + Y[4] = X[4]>>3 ^ X[3]; + Y[3] = X[2]; + Y[2] = X[6]<<2 ^ X[1]; + Y[1] = X[0]; + Y[0] = X[7]; +} + +static void _multiply_M2(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) +{ + Y[7] = X[5]; + Y[6] = X[5]<<3 ^ X[4]; + Y[5] = X[5]<<6 ^ _M1(X[4]) ^ X[3]; + Y[4] = X[4]>>6 ^ X[3]>>3 ^ X[2]; + Y[3] = X[6]<<2 ^ X[1]; + Y[2] = X[5]<<2 ^ X[0]; + Y[1] = X[7]; + Y[0] = X[6]; +} + +static void _multiply_M3(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) +{ + Y[7] = X[5]<<3 ^ X[4]; + Y[6] = X[5]<<6 ^ _M1(X[4]) ^ X[3]; + Y[5] = _M2(X[4]) ^ _M1(X[3]) ^ X[2]; + Y[4] = X[6]<<2 ^ X[3]>>6 ^ X[2]>>3 ^ X[1]; + Y[3] = X[5]<<2 ^ X[0]; + Y[2] = X[7] ^ X[5]<<5 ^ X[4]<<2; + Y[1] = X[6]; + Y[0] = X[5]; +} + +static void _multiply_MR(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) +{ + Y[0] = X[1]; + Y[1] = X[2]; + Y[2] = X[3] ^ X[4]>>3; + Y[3] = X[4]; + Y[4] = X[5] ^ X[6]<<3; + Y[5] = X[3]<<2 ^ X[6]; + Y[6] = X[7]; + Y[7] = X[0]; +} + +static void _multiply_MR2(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) +{ + Y[0] = X[2]; + Y[1] = X[3] ^ X[4]>>3; + Y[2] = X[4] ^ X[5]>>3 ^ _M3(X[6]); + Y[3] = X[5] ^ X[6]<<3; + Y[4] = X[3]<<2 ^ X[6] ^ X[7]<<3; + Y[5] = X[4]<<2 ^ X[7]; + Y[6] = X[0]; + Y[7] = X[1]; +} + +static void _multiply_MR3(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) +{ + Y[0] = X[3] ^ X[4]>>3; + Y[1] = X[4] ^ X[5]>>3 ^ _M3(X[6]); + Y[2] = _M4(X[3]) ^ X[5] ^ _M1(X[6]) ^ _M3(X[7]); + Y[3] = X[3]<<2 ^ X[6] ^ X[7]<<3; + Y[4] = X[0]<<3 ^ X[4]<<2 ^ X[7]; + Y[5] = X[0] ^ X[5]<<2 ^ X[6]<<5; + Y[6] = X[1]; + Y[7] = X[2]; +} + +typedef void (*matrix_multiplication)(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]); + +static const matrix_multiplication ALPHAS[6] = { + _multiply_M, + _multiply_M2, + _multiply_M3, + _multiply_MR, + _multiply_MR2, + _multiply_MR3 +}; + + +void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES]) +{ + /* Skip lane 0, as it is multiplied by the identity matrix. */ + + for (size_t j=1; j<LANES_NB; j++) + { + uint8_t *TKj = TK + j*LANE_BYTES; + + uint8_t TKj_old[LANE_BYTES]; + memcpy(TKj_old, TKj, LANE_BYTES); + + ALPHAS[j-1](TKj_old, TKj); + } +} diff --git a/src/add_tweakeyunrolled/tweakey.h b/src/add_tweakeyunrolled/tweakey.h @@ -0,0 +1 @@ +../ref/tweakey.h +\ No newline at end of file diff --git a/src/ref/tweakey.c b/src/ref/tweakey.c @@ -14,7 +14,7 @@ http://creativecommons.org/publicdomain/zero/1.0/ This file provides an implementation of Lilliput-TBC's tweakey schedule, where multiplications by matrices M and M_R to the power n are performed -by functions expressing the exponentiated matrices with shifts and XORs. +by applying functions for M and M_R n times. */ #include <stdint.h> @@ -62,26 +62,6 @@ void tweakey_state_extract( } -static uint8_t _M1(uint8_t x) -{ - return x<<3 ^ x>>3; -} - -static uint8_t _M2(uint8_t x) -{ - return x<<6 ^ (x & 0xf8) ^ x>>6; -} - -static uint8_t _M3(uint8_t x) -{ - return (uint8_t)(x<<3) >> 3; -} - -static uint8_t _M4(uint8_t x) -{ - return (uint8_t)(x<<2) >> 3; -} - static void _multiply_M(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) { Y[7] = X[6]; @@ -96,26 +76,18 @@ static void _multiply_M(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) static void _multiply_M2(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) { - Y[7] = X[5]; - Y[6] = X[5]<<3 ^ X[4]; - Y[5] = X[5]<<6 ^ _M1(X[4]) ^ X[3]; - Y[4] = X[4]>>6 ^ X[3]>>3 ^ X[2]; - Y[3] = X[6]<<2 ^ X[1]; - Y[2] = X[5]<<2 ^ X[0]; - Y[1] = X[7]; - Y[0] = X[6]; + uint8_t M_X[LANE_BYTES]; + _multiply_M(X, M_X); + _multiply_M(M_X, Y); } static void _multiply_M3(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) { - Y[7] = X[5]<<3 ^ X[4]; - Y[6] = X[5]<<6 ^ _M1(X[4]) ^ X[3]; - Y[5] = _M2(X[4]) ^ _M1(X[3]) ^ X[2]; - Y[4] = X[6]<<2 ^ X[3]>>6 ^ X[2]>>3 ^ X[1]; - Y[3] = X[5]<<2 ^ X[0]; - Y[2] = X[7] ^ X[5]<<5 ^ X[4]<<2; - Y[1] = X[6]; - Y[0] = X[5]; + uint8_t M_X[LANE_BYTES]; + uint8_t M2_X[LANE_BYTES]; + _multiply_M(X, M_X); + _multiply_M(M_X, M2_X); + _multiply_M(M2_X, Y); } static void _multiply_MR(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) @@ -132,26 +104,18 @@ static void _multiply_MR(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) static void _multiply_MR2(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) { - Y[0] = X[2]; - Y[1] = X[3] ^ X[4]>>3; - Y[2] = X[4] ^ X[5]>>3 ^ _M3(X[6]); - Y[3] = X[5] ^ X[6]<<3; - Y[4] = X[3]<<2 ^ X[6] ^ X[7]<<3; - Y[5] = X[4]<<2 ^ X[7]; - Y[6] = X[0]; - Y[7] = X[1]; + uint8_t MR_X[LANE_BYTES]; + _multiply_MR(X, MR_X); + _multiply_MR(MR_X, Y); } static void _multiply_MR3(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]) { - Y[0] = X[3] ^ X[4]>>3; - Y[1] = X[4] ^ X[5]>>3 ^ _M3(X[6]); - Y[2] = _M4(X[3]) ^ X[5] ^ _M1(X[6]) ^ _M3(X[7]); - Y[3] = X[3]<<2 ^ X[6] ^ X[7]<<3; - Y[4] = X[0]<<3 ^ X[4]<<2 ^ X[7]; - Y[5] = X[0] ^ X[5]<<2 ^ X[6]<<5; - Y[6] = X[1]; - Y[7] = X[2]; + uint8_t MR_X[LANE_BYTES]; + uint8_t MR2_X[LANE_BYTES]; + _multiply_MR(X, MR_X); + _multiply_MR(MR_X, MR2_X); + _multiply_MR(MR2_X, Y); } typedef void (*matrix_multiplication)(const uint8_t X[LANE_BYTES], uint8_t Y[LANE_BYTES]); diff --git a/traces/traces-tbc.patch b/traces/traces-tbc.patch @@ -121,7 +121,7 @@ index 39e5980..4cdcf2a 100644 --- a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/tweakey.c +++ b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/tweakey.c @@ -17,6 +17,8 @@ where multiplications by matrices M and M_R to the power n is performed by - functions expressing the exponentiated matrices with shifts and XORs. + by applying functions for M and M_R n times. */ +#include "debug.h"