From d26dfcef1bca5d86ce9042b78605a399b6d74423 Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Wed, 20 Mar 2019 15:46:12 +0100 Subject: Ajout de l'implémentation "FELICS" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Semblable en tout point à l'implémentation de référence, sauf pour des optimisations manuelles dans tweakey.c. Les gains sont significatifs même si surprenants : Lilliput-I-128 on AVR (vref with -O3) code_size: -3.21% (7420 ↘ 7182) code_ram: -2.08% (530 ↘ 519) code_time: -26.13% (176922 ↘ 130701) Lilliput-I-192 on AVR (vref with -O3) code_size: -3.66% (7550 ↘ 7274) code_ram: -1.90% (578 ↘ 567) code_time: -28.34% (228210 ↘ 163530) Lilliput-I-256 on AVR (vref with -O3) code_size: -4.05% (7698 ↘ 7386) code_ram: -1.71% (642 ↘ 631) code_time: -29.87% (301863 ↘ 211704) Lilliput-II-128 on AVR (vref with -O3) code_size: -3.04% (6704 ↘ 6500) code_ram: -2.94% (511 ↘ 496) code_time: -25.97% (181884 ↘ 134648) Lilliput-II-192 on AVR (vref with -O3) code_size: -3.56% (6682 ↘ 6444) code_ram: -1.97% (559 ↘ 548) code_time: -26.30% (264608 ↘ 195028) Lilliput-II-256 on AVR (vref with -O3) code_size: -4.06% (6804 ↘ 6528) code_ram: -1.77% (623 ↘ 612) code_time: -28.47% (354220 ↘ 253368) Lilliput-I-128 on MSP (vref with -O3) code_time: -17.72% (153285 ↘ 126129) Lilliput-I-192 on MSP (vref with -O3) code_size: -1.02% (8466 ↘ 8380) code_time: -19.77% (199203 ↘ 159828) Lilliput-I-256 on MSP (vref with -O3) code_time: -20.90% (268416 ↘ 212328) Lilliput-II-128 on MSP (vref with -O3) code_size: -2.49% (6336 ↘ 6178) code_time: -13.25% (172179 ↘ 149363) Lilliput-II-192 on MSP (vref with -O3) code_size: -1.22% (6406 ↘ 6328) code_time: -17.93% (227943 ↘ 187063) Lilliput-II-256 on MSP (vref with -O3) code_size: -1.30% (6600 ↘ 6514) code_time: -19.98% (307751 ↘ 246251) Lilliput-I-128 on ARM (vref with -O3) code_time: -16.94% (104944 ↘ 87170) Lilliput-I-192 on ARM (vref with -O3) code_time: -18.41% (132736 ↘ 108295) Lilliput-I-256 on ARM (vref with -O3) code_time: -18.74% (175979 ↘ 143001) Lilliput-II-128 on ARM (vref with -O3) code_time: -17.63% (114004 ↘ 93907) Lilliput-II-192 on ARM (vref with -O3) code_time: -17.55% (157405 ↘ 129780) Lilliput-II-256 on ARM (vref with -O3) code_time: -18.44% (206440 ↘ 168382) Lilliput-I-128 on PC (vref with -O3) code_time: -11.43% (11744 ↘ 10402) Lilliput-I-192 on PC (vref with -O3) code_time: -10.54% (14593 ↘ 13055) Lilliput-I-256 on PC (vref with -O3) code_time: -11.80% (18856 ↘ 16631) Lilliput-II-128 on PC (vref with -O3) code_size: -1.02% (7421 ↘ 7345) code_time: -9.11% (13080 ↘ 11889) Lilliput-II-192 on PC (vref with -O3) code_time: -10.51% (16809 ↘ 15043) Lilliput-II-256 on PC (vref with -O3) code_time: -10.96% (21970 ↘ 19561) --- src/add_felicsref/cipher.c | 1 + src/add_felicsref/cipher.h | 1 + src/add_felicsref/constants.h | 1 + src/add_felicsref/implem.mk | 1 + src/add_felicsref/lilliput-ae-utils.h | 1 + src/add_felicsref/lilliput-ae.h | 1 + src/add_felicsref/lilliput-i.c | 1 + src/add_felicsref/lilliput-ii.c | 1 + src/add_felicsref/tweakey.c | 213 ++++++++++++++++++++++++++++++++++ src/add_felicsref/tweakey.h | 1 + 10 files changed, 222 insertions(+) create mode 120000 src/add_felicsref/cipher.c create mode 120000 src/add_felicsref/cipher.h create mode 120000 src/add_felicsref/constants.h create mode 120000 src/add_felicsref/implem.mk create mode 120000 src/add_felicsref/lilliput-ae-utils.h create mode 120000 src/add_felicsref/lilliput-ae.h create mode 120000 src/add_felicsref/lilliput-i.c create mode 120000 src/add_felicsref/lilliput-ii.c create mode 100644 src/add_felicsref/tweakey.c create mode 120000 src/add_felicsref/tweakey.h (limited to 'src') diff --git a/src/add_felicsref/cipher.c b/src/add_felicsref/cipher.c new file mode 120000 index 0000000..a2ac6a3 --- /dev/null +++ b/src/add_felicsref/cipher.c @@ -0,0 +1 @@ +../ref/cipher.c \ No newline at end of file diff --git a/src/add_felicsref/cipher.h b/src/add_felicsref/cipher.h new file mode 120000 index 0000000..eab258b --- /dev/null +++ b/src/add_felicsref/cipher.h @@ -0,0 +1 @@ +../ref/cipher.h \ No newline at end of file diff --git a/src/add_felicsref/constants.h b/src/add_felicsref/constants.h new file mode 120000 index 0000000..67df0f3 --- /dev/null +++ b/src/add_felicsref/constants.h @@ -0,0 +1 @@ +../ref/constants.h \ No newline at end of file diff --git a/src/add_felicsref/implem.mk b/src/add_felicsref/implem.mk new file mode 120000 index 0000000..eb789fb --- /dev/null +++ b/src/add_felicsref/implem.mk @@ -0,0 +1 @@ +../ref/implem.mk \ No newline at end of file diff --git a/src/add_felicsref/lilliput-ae-utils.h b/src/add_felicsref/lilliput-ae-utils.h new file mode 120000 index 0000000..b46625b --- /dev/null +++ b/src/add_felicsref/lilliput-ae-utils.h @@ -0,0 +1 @@ +../ref/lilliput-ae-utils.h \ No newline at end of file diff --git a/src/add_felicsref/lilliput-ae.h b/src/add_felicsref/lilliput-ae.h new file mode 120000 index 0000000..66c8314 --- /dev/null +++ b/src/add_felicsref/lilliput-ae.h @@ -0,0 +1 @@ +../ref/lilliput-ae.h \ No newline at end of file diff --git a/src/add_felicsref/lilliput-i.c b/src/add_felicsref/lilliput-i.c new file mode 120000 index 0000000..46688d4 --- /dev/null +++ b/src/add_felicsref/lilliput-i.c @@ -0,0 +1 @@ +../ref/lilliput-i.c \ No newline at end of file diff --git a/src/add_felicsref/lilliput-ii.c b/src/add_felicsref/lilliput-ii.c new file mode 120000 index 0000000..09abf10 --- /dev/null +++ b/src/add_felicsref/lilliput-ii.c @@ -0,0 +1 @@ +../ref/lilliput-ii.c \ No newline at end of file diff --git a/src/add_felicsref/tweakey.c b/src/add_felicsref/tweakey.c new file mode 100644 index 0000000..635c179 --- /dev/null +++ b/src/add_felicsref/tweakey.c @@ -0,0 +1,213 @@ +/* +Implementation of the Lilliput-AE tweakable block cipher. + +Authors, hereby denoted as "the implementer": + Kévin Le Gouguec, + 2019. + +For more information, feedback or questions, refer to our website: +https://paclido.fr/lilliput-ae + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file provides an implementation of Lilliput-TBC's tweakey schedule, +similar to the reference implementation save for a few manual optimizations: + +- unused multiplication functions were removed using preprocessor + conditionals based on the number of lanes; + +- the loop over an array of function pointers was unrolled. + +These handmade optimizations have been found to significantly decrease code +size and execution time on GCC versions used in the FELICS framework. + +This suggests that the compiler does not detect dead code nor does it +recognize unrolling opportunities, despite the multiplication functions +being static and thus limited in scope to the compilation unit. +*/ + +#include +#include + +#include "constants.h" +#include "tweakey.h" + + +#define LANE_BITS 64 +#define LANE_BYTES (LANE_BITS/8) +#define LANES_NB (TWEAKEY_BYTES/LANE_BYTES) + + +void tweakey_state_init( + uint8_t TK[TWEAKEY_BYTES], + const uint8_t key[KEY_BYTES], + const uint8_t tweak[TWEAK_BYTES] +) +{ + memcpy(TK, tweak, TWEAK_BYTES); + memcpy(TK+TWEAK_BYTES, key, KEY_BYTES); +} + + +void tweakey_state_extract( + const uint8_t TK[TWEAKEY_BYTES], + uint8_t round_constant, + uint8_t round_tweakey[ROUND_TWEAKEY_BYTES] +) +{ + memset(round_tweakey, 0, ROUND_TWEAKEY_BYTES); + + for (size_t j=0; j>3 ^ x[3]; + y[3] = x[2]; + y[2] = x[6]<<2 ^ x[1]; + y[1] = x[0]; + y[0] = x[7]; +} + +static void _multiply_M2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_M_5 = x[5]<<3 ^ x[4]; + uint8_t x_M_4 = x[4]>>3 ^ x[3]; + + y[7] = x[5]; + y[6] = x_M_5; + y[5] = x_M_5<<3 ^ x_M_4; + y[4] = x_M_4>>3 ^ x[2]; + y[3] = x[6]<<2 ^ x[1]; + y[2] = x[5]<<2 ^ x[0]; + y[1] = x[7]; + y[0] = x[6]; +} + +static void _multiply_M3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_M_5 = x[5]<<3 ^ x[4]; + uint8_t x_M_4 = x[4]>>3 ^ x[3]; + uint8_t x_M2_5 = x_M_5<<3 ^ x_M_4; + uint8_t x_M2_4 = x_M_4>>3 ^ x[2]; + + y[7] = x_M_5; + y[6] = x_M2_5; + y[5] = x_M2_5<<3 ^ x_M2_4; + y[4] = x_M2_4>>3 ^ x[6]<<2 ^ x[1]; + y[3] = x[5]<<2 ^ x[0]; + y[2] = x_M_5<<2 ^ x[7]; + y[1] = x[6]; + y[0] = x[5]; +} + +#if LANES_NB >= 5 +static void _multiply_MR(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + y[0] = x[1]; + y[1] = x[2]; + y[2] = x[3] ^ x[4]>>3; + y[3] = x[4]; + y[4] = x[5] ^ x[6]<<3; + y[5] = x[3]<<2 ^ x[6]; + y[6] = x[7]; + y[7] = x[0]; +} + +#if LANES_NB >= 6 +static void _multiply_MR2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_MR_4 = x[5] ^ x[6]<<3; + + y[0] = x[2]; + y[1] = x[3] ^ x[4]>>3; + y[2] = x[4] ^ x_MR_4>>3; + y[3] = x_MR_4; + y[4] = x[3]<<2 ^ x[6] ^ x[7]<<3; + y[5] = x[4]<<2 ^ x[7]; + y[6] = x[0]; + y[7] = x[1]; +} + +#if LANES_NB >= 7 +static void _multiply_MR3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES]) +{ + uint8_t x_MR_4 = x[5] ^ x[6]<<3; + uint8_t x_MR2_4 = x[3]<<2 ^ x[6] ^ x[7]<<3; + + y[0] = x[3] ^ x[4]>>3; + y[1] = x[4] ^ x_MR_4>>3; + y[2] = x_MR_4 ^ x_MR2_4>>3; + y[3] = x_MR2_4; + y[4] = x[0]<<3 ^ x[4]<<2 ^ x[7]; + y[5] = x_MR_4<<2 ^ x[0]; + y[6] = x[1]; + y[7] = x[2]; +} +#endif +#endif +#endif + + +void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES]) +{ + /* Skip lane 0, as it is multiplied by the identity matrix. */ + + size_t j; + uint8_t *TKj; + uint8_t TKj_old[LANE_BYTES]; + + j = 1; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_M(TKj_old, TKj); + + j = 2; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_M2(TKj_old, TKj); + + j = 3; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_M3(TKj_old, TKj); + +#if LANES_NB >= 5 + j = 4; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_MR(TKj_old, TKj); + +#if LANES_NB >= 6 + j = 5; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_MR2(TKj_old, TKj); + +#if LANES_NB >= 7 + j = 6; + TKj = TK + j*LANE_BYTES; + memcpy(TKj_old, TKj, LANE_BYTES); + _multiply_MR3(TKj_old, TKj); +#endif +#endif +#endif +} diff --git a/src/add_felicsref/tweakey.h b/src/add_felicsref/tweakey.h new file mode 120000 index 0000000..7f2415f --- /dev/null +++ b/src/add_felicsref/tweakey.h @@ -0,0 +1 @@ +../ref/tweakey.h \ No newline at end of file -- cgit v1.2.3