summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKévin Le Gouguec <kevin.legouguec@airbus.com>2019-03-20 15:46:12 +0100
committerKévin Le Gouguec <kevin.legouguec@airbus.com>2019-03-20 15:46:12 +0100
commitd26dfcef1bca5d86ce9042b78605a399b6d74423 (patch)
tree71cc45dfb0f3925fe1c56b8a2f40dc27004b6037
parent3ffe28672860a63fd95ed9e97601f30258ea2bdb (diff)
downloadlilliput-ae-implem-d26dfcef1bca5d86ce9042b78605a399b6d74423.tar.xz
Ajout de l'implémentation "FELICS"
Semblable en tout point à l'implémentation de référence, sauf pour des optimisations manuelles dans tweakey.c. Les gains sont significatifs même si surprenants : Lilliput-I-128 on AVR (vref with -O3) code_size: -3.21% (7420 ↘ 7182) code_ram: -2.08% (530 ↘ 519) code_time: -26.13% (176922 ↘ 130701) Lilliput-I-192 on AVR (vref with -O3) code_size: -3.66% (7550 ↘ 7274) code_ram: -1.90% (578 ↘ 567) code_time: -28.34% (228210 ↘ 163530) Lilliput-I-256 on AVR (vref with -O3) code_size: -4.05% (7698 ↘ 7386) code_ram: -1.71% (642 ↘ 631) code_time: -29.87% (301863 ↘ 211704) Lilliput-II-128 on AVR (vref with -O3) code_size: -3.04% (6704 ↘ 6500) code_ram: -2.94% (511 ↘ 496) code_time: -25.97% (181884 ↘ 134648) Lilliput-II-192 on AVR (vref with -O3) code_size: -3.56% (6682 ↘ 6444) code_ram: -1.97% (559 ↘ 548) code_time: -26.30% (264608 ↘ 195028) Lilliput-II-256 on AVR (vref with -O3) code_size: -4.06% (6804 ↘ 6528) code_ram: -1.77% (623 ↘ 612) code_time: -28.47% (354220 ↘ 253368) Lilliput-I-128 on MSP (vref with -O3) code_time: -17.72% (153285 ↘ 126129) Lilliput-I-192 on MSP (vref with -O3) code_size: -1.02% (8466 ↘ 8380) code_time: -19.77% (199203 ↘ 159828) Lilliput-I-256 on MSP (vref with -O3) code_time: -20.90% (268416 ↘ 212328) Lilliput-II-128 on MSP (vref with -O3) code_size: -2.49% (6336 ↘ 6178) code_time: -13.25% (172179 ↘ 149363) Lilliput-II-192 on MSP (vref with -O3) code_size: -1.22% (6406 ↘ 6328) code_time: -17.93% (227943 ↘ 187063) Lilliput-II-256 on MSP (vref with -O3) code_size: -1.30% (6600 ↘ 6514) code_time: -19.98% (307751 ↘ 246251) Lilliput-I-128 on ARM (vref with -O3) code_time: -16.94% (104944 ↘ 87170) Lilliput-I-192 on ARM (vref with -O3) code_time: -18.41% (132736 ↘ 108295) Lilliput-I-256 on ARM (vref with -O3) code_time: -18.74% (175979 ↘ 143001) Lilliput-II-128 on ARM (vref with -O3) code_time: -17.63% (114004 ↘ 93907) Lilliput-II-192 on ARM (vref with -O3) code_time: -17.55% (157405 ↘ 129780) Lilliput-II-256 on ARM (vref with -O3) code_time: -18.44% (206440 ↘ 168382) Lilliput-I-128 on PC (vref with -O3) code_time: -11.43% (11744 ↘ 10402) Lilliput-I-192 on PC (vref with -O3) code_time: -10.54% (14593 ↘ 13055) Lilliput-I-256 on PC (vref with -O3) code_time: -11.80% (18856 ↘ 16631) Lilliput-II-128 on PC (vref with -O3) code_size: -1.02% (7421 ↘ 7345) code_time: -9.11% (13080 ↘ 11889) Lilliput-II-192 on PC (vref with -O3) code_time: -10.51% (16809 ↘ 15043) Lilliput-II-256 on PC (vref with -O3) code_time: -10.96% (21970 ↘ 19561)
-rwxr-xr-xnist/make-package.sh1
l---------src/add_felicsref/cipher.c1
l---------src/add_felicsref/cipher.h1
l---------src/add_felicsref/constants.h1
l---------src/add_felicsref/implem.mk1
l---------src/add_felicsref/lilliput-ae-utils.h1
l---------src/add_felicsref/lilliput-ae.h1
l---------src/add_felicsref/lilliput-i.c1
l---------src/add_felicsref/lilliput-ii.c1
-rw-r--r--src/add_felicsref/tweakey.c213
l---------src/add_felicsref/tweakey.h1
11 files changed, 223 insertions, 0 deletions
diff --git a/nist/make-package.sh b/nist/make-package.sh
index 124da4b..234532a 100755
--- a/nist/make-package.sh
+++ b/nist/make-package.sh
@@ -65,6 +65,7 @@ add-variant ()
implementations=(
ref
+ add_felicsref
add_threshold
add_tweakeyloop
)
diff --git a/src/add_felicsref/cipher.c b/src/add_felicsref/cipher.c
new file mode 120000
index 0000000..a2ac6a3
--- /dev/null
+++ b/src/add_felicsref/cipher.c
@@ -0,0 +1 @@
+../ref/cipher.c \ No newline at end of file
diff --git a/src/add_felicsref/cipher.h b/src/add_felicsref/cipher.h
new file mode 120000
index 0000000..eab258b
--- /dev/null
+++ b/src/add_felicsref/cipher.h
@@ -0,0 +1 @@
+../ref/cipher.h \ No newline at end of file
diff --git a/src/add_felicsref/constants.h b/src/add_felicsref/constants.h
new file mode 120000
index 0000000..67df0f3
--- /dev/null
+++ b/src/add_felicsref/constants.h
@@ -0,0 +1 @@
+../ref/constants.h \ No newline at end of file
diff --git a/src/add_felicsref/implem.mk b/src/add_felicsref/implem.mk
new file mode 120000
index 0000000..eb789fb
--- /dev/null
+++ b/src/add_felicsref/implem.mk
@@ -0,0 +1 @@
+../ref/implem.mk \ No newline at end of file
diff --git a/src/add_felicsref/lilliput-ae-utils.h b/src/add_felicsref/lilliput-ae-utils.h
new file mode 120000
index 0000000..b46625b
--- /dev/null
+++ b/src/add_felicsref/lilliput-ae-utils.h
@@ -0,0 +1 @@
+../ref/lilliput-ae-utils.h \ No newline at end of file
diff --git a/src/add_felicsref/lilliput-ae.h b/src/add_felicsref/lilliput-ae.h
new file mode 120000
index 0000000..66c8314
--- /dev/null
+++ b/src/add_felicsref/lilliput-ae.h
@@ -0,0 +1 @@
+../ref/lilliput-ae.h \ No newline at end of file
diff --git a/src/add_felicsref/lilliput-i.c b/src/add_felicsref/lilliput-i.c
new file mode 120000
index 0000000..46688d4
--- /dev/null
+++ b/src/add_felicsref/lilliput-i.c
@@ -0,0 +1 @@
+../ref/lilliput-i.c \ No newline at end of file
diff --git a/src/add_felicsref/lilliput-ii.c b/src/add_felicsref/lilliput-ii.c
new file mode 120000
index 0000000..09abf10
--- /dev/null
+++ b/src/add_felicsref/lilliput-ii.c
@@ -0,0 +1 @@
+../ref/lilliput-ii.c \ No newline at end of file
diff --git a/src/add_felicsref/tweakey.c b/src/add_felicsref/tweakey.c
new file mode 100644
index 0000000..635c179
--- /dev/null
+++ b/src/add_felicsref/tweakey.c
@@ -0,0 +1,213 @@
+/*
+Implementation of the Lilliput-AE tweakable block cipher.
+
+Authors, hereby denoted as "the implementer":
+ Kévin Le Gouguec,
+ 2019.
+
+For more information, feedback or questions, refer to our website:
+https://paclido.fr/lilliput-ae
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+
+---
+
+This file provides an implementation of Lilliput-TBC's tweakey schedule,
+similar to the reference implementation save for a few manual optimizations:
+
+- unused multiplication functions were removed using preprocessor
+ conditionals based on the number of lanes;
+
+- the loop over an array of function pointers was unrolled.
+
+These handmade optimizations have been found to significantly decrease code
+size and execution time on GCC versions used in the FELICS framework.
+
+This suggests that the compiler does not detect dead code nor does it
+recognize unrolling opportunities, despite the multiplication functions
+being static and thus limited in scope to the compilation unit.
+*/
+
+#include <stdint.h>
+#include <string.h>
+
+#include "constants.h"
+#include "tweakey.h"
+
+
+#define LANE_BITS 64
+#define LANE_BYTES (LANE_BITS/8)
+#define LANES_NB (TWEAKEY_BYTES/LANE_BYTES)
+
+
+void tweakey_state_init(
+ uint8_t TK[TWEAKEY_BYTES],
+ const uint8_t key[KEY_BYTES],
+ const uint8_t tweak[TWEAK_BYTES]
+)
+{
+ memcpy(TK, tweak, TWEAK_BYTES);
+ memcpy(TK+TWEAK_BYTES, key, KEY_BYTES);
+}
+
+
+void tweakey_state_extract(
+ const uint8_t TK[TWEAKEY_BYTES],
+ uint8_t round_constant,
+ uint8_t round_tweakey[ROUND_TWEAKEY_BYTES]
+)
+{
+ memset(round_tweakey, 0, ROUND_TWEAKEY_BYTES);
+
+ for (size_t j=0; j<LANES_NB; j++)
+ {
+ const uint8_t *TKj = TK + j*LANE_BYTES;
+
+ for (size_t k=0; k<LANE_BYTES; k++)
+ {
+ round_tweakey[k] ^= TKj[k];
+ }
+ }
+
+ round_tweakey[0] ^= round_constant;
+}
+
+
+static void _multiply_M(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+ y[7] = x[6];
+ y[6] = x[5];
+ y[5] = x[5]<<3 ^ x[4];
+ y[4] = x[4]>>3 ^ x[3];
+ y[3] = x[2];
+ y[2] = x[6]<<2 ^ x[1];
+ y[1] = x[0];
+ y[0] = x[7];
+}
+
+static void _multiply_M2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+ uint8_t x_M_5 = x[5]<<3 ^ x[4];
+ uint8_t x_M_4 = x[4]>>3 ^ x[3];
+
+ y[7] = x[5];
+ y[6] = x_M_5;
+ y[5] = x_M_5<<3 ^ x_M_4;
+ y[4] = x_M_4>>3 ^ x[2];
+ y[3] = x[6]<<2 ^ x[1];
+ y[2] = x[5]<<2 ^ x[0];
+ y[1] = x[7];
+ y[0] = x[6];
+}
+
+static void _multiply_M3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+ uint8_t x_M_5 = x[5]<<3 ^ x[4];
+ uint8_t x_M_4 = x[4]>>3 ^ x[3];
+ uint8_t x_M2_5 = x_M_5<<3 ^ x_M_4;
+ uint8_t x_M2_4 = x_M_4>>3 ^ x[2];
+
+ y[7] = x_M_5;
+ y[6] = x_M2_5;
+ y[5] = x_M2_5<<3 ^ x_M2_4;
+ y[4] = x_M2_4>>3 ^ x[6]<<2 ^ x[1];
+ y[3] = x[5]<<2 ^ x[0];
+ y[2] = x_M_5<<2 ^ x[7];
+ y[1] = x[6];
+ y[0] = x[5];
+}
+
+#if LANES_NB >= 5
+static void _multiply_MR(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+ y[0] = x[1];
+ y[1] = x[2];
+ y[2] = x[3] ^ x[4]>>3;
+ y[3] = x[4];
+ y[4] = x[5] ^ x[6]<<3;
+ y[5] = x[3]<<2 ^ x[6];
+ y[6] = x[7];
+ y[7] = x[0];
+}
+
+#if LANES_NB >= 6
+static void _multiply_MR2(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+ uint8_t x_MR_4 = x[5] ^ x[6]<<3;
+
+ y[0] = x[2];
+ y[1] = x[3] ^ x[4]>>3;
+ y[2] = x[4] ^ x_MR_4>>3;
+ y[3] = x_MR_4;
+ y[4] = x[3]<<2 ^ x[6] ^ x[7]<<3;
+ y[5] = x[4]<<2 ^ x[7];
+ y[6] = x[0];
+ y[7] = x[1];
+}
+
+#if LANES_NB >= 7
+static void _multiply_MR3(const uint8_t x[LANE_BYTES], uint8_t y[LANE_BYTES])
+{
+ uint8_t x_MR_4 = x[5] ^ x[6]<<3;
+ uint8_t x_MR2_4 = x[3]<<2 ^ x[6] ^ x[7]<<3;
+
+ y[0] = x[3] ^ x[4]>>3;
+ y[1] = x[4] ^ x_MR_4>>3;
+ y[2] = x_MR_4 ^ x_MR2_4>>3;
+ y[3] = x_MR2_4;
+ y[4] = x[0]<<3 ^ x[4]<<2 ^ x[7];
+ y[5] = x_MR_4<<2 ^ x[0];
+ y[6] = x[1];
+ y[7] = x[2];
+}
+#endif
+#endif
+#endif
+
+
+void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES])
+{
+ /* Skip lane 0, as it is multiplied by the identity matrix. */
+
+ size_t j;
+ uint8_t *TKj;
+ uint8_t TKj_old[LANE_BYTES];
+
+ j = 1;
+ TKj = TK + j*LANE_BYTES;
+ memcpy(TKj_old, TKj, LANE_BYTES);
+ _multiply_M(TKj_old, TKj);
+
+ j = 2;
+ TKj = TK + j*LANE_BYTES;
+ memcpy(TKj_old, TKj, LANE_BYTES);
+ _multiply_M2(TKj_old, TKj);
+
+ j = 3;
+ TKj = TK + j*LANE_BYTES;
+ memcpy(TKj_old, TKj, LANE_BYTES);
+ _multiply_M3(TKj_old, TKj);
+
+#if LANES_NB >= 5
+ j = 4;
+ TKj = TK + j*LANE_BYTES;
+ memcpy(TKj_old, TKj, LANE_BYTES);
+ _multiply_MR(TKj_old, TKj);
+
+#if LANES_NB >= 6
+ j = 5;
+ TKj = TK + j*LANE_BYTES;
+ memcpy(TKj_old, TKj, LANE_BYTES);
+ _multiply_MR2(TKj_old, TKj);
+
+#if LANES_NB >= 7
+ j = 6;
+ TKj = TK + j*LANE_BYTES;
+ memcpy(TKj_old, TKj, LANE_BYTES);
+ _multiply_MR3(TKj_old, TKj);
+#endif
+#endif
+#endif
+}
diff --git a/src/add_felicsref/tweakey.h b/src/add_felicsref/tweakey.h
new file mode 120000
index 0000000..7f2415f
--- /dev/null
+++ b/src/add_felicsref/tweakey.h
@@ -0,0 +1 @@
+../ref/tweakey.h \ No newline at end of file