1 files changed, 310 insertions, 0 deletions
diff --git a/traces/add_threshold/traces-tbc.patch b/traces/add_threshold/traces-tbc.patch
new file mode 100644
index 0000000..69efdf1
--- /dev/null
+++ b/traces/add_threshold/traces-tbc.patch
@@ -0,0 +1,310 @@
+diff --git a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/cipher.c b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/cipher.c
+index db1ec04..5c2db14 100644
+--- a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/cipher.c
++++ b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/cipher.c
+@@ -25,6 +25,8 @@ throughout the entire round function in order to avoid extra randomness
+ generation to switch from 2 shares to 3 shares and vice versa.
+ */
+ 
++#include "debug.h"
++
+ #include <stdint.h>
+ #include <string.h>
+ 
+@@ -100,6 +102,8 @@ static void _state_init(
+     uint8_t SHARES_1[BLOCK_BYTES];
+     randombytes(sizeof(SHARES_0), SHARES_0);
+     randombytes(sizeof(SHARES_1), SHARES_1);
++    debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8);
++    debug_dump_buffer("SHARES_1", sizeof(SHARES_1), SHARES_1, 8);
+ 
+     memcpy(X, SHARES_0, BLOCK_BYTES);
+     memcpy(Y, SHARES_1, BLOCK_BYTES);
+@@ -117,15 +121,25 @@ static void _compute_round_tweakeys(
+     uint8_t RTK_Y[ROUNDS][ROUND_TWEAKEY_BYTES]
+ )
+ {
++    fprintf(DUMP, "computing %zu round sub-tweakeys\n", (size_t)ROUNDS);
++
+     uint8_t TK_X[TWEAKEY_BYTES];
+     uint8_t TK_Y[TWEAKEY_BYTES];
+     tweakey_state_init(TK_X, TK_Y, key, tweak);
+     tweakey_state_extract(TK_X, TK_Y, 0, RTK_X[0], RTK_Y[0]);
+ 
++    fprintf(DUMP, "    0\n");
++    debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, RTK_X[0], 8);
++    debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, RTK_Y[0], 8);
++
+     for (uint8_t i=1; i<ROUNDS; i++)
+     {
+         tweakey_state_update(TK_X, TK_Y);
++        debug_dump_buffer("TK_X", TWEAKEY_BYTES, TK_X, 8);
++        debug_dump_buffer("TK_Y", TWEAKEY_BYTES, TK_Y, 8);
+         tweakey_state_extract(TK_X, TK_Y, i, RTK_X[i], RTK_Y[i]);
++        debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, RTK_X[i], 8);
++        debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, RTK_Y[i], 8);
+     }
+ }
+ 
+@@ -138,6 +152,12 @@ static void _nonlinear_layer(
+     const uint8_t RTK_Y[ROUND_TWEAKEY_BYTES]
+ )
+ {
++    fprintf(DUMP, "        nonlinear layer\n");
++
++    debug_dump_buffer("X", BLOCK_BYTES, X, 12);
++    debug_dump_buffer("Y", BLOCK_BYTES, Y, 12);
++    debug_dump_buffer("Z", BLOCK_BYTES, Z, 12);
++
+     uint8_t x_hi, y_hi, z_hi;   // High nibbles for the Feistel network
+     uint8_t x_lo, y_lo, z_lo;   // Low nibbles for the Feistel network
+     uint8_t tmp0, tmp1, tmp2;
+@@ -152,9 +172,14 @@ static void _nonlinear_layer(
+         TMP_Y[j] = Y[j] ^ RTK_Y[j];
+     }
+ 
++    debug_dump_buffer("Xj XOR RTK_Xj", sizeof(TMP_X), TMP_X, 12);
++    debug_dump_buffer("Yj XOR RTK_Yj", sizeof(TMP_Y), TMP_Y, 12);
++
+     // Threshold Implementation of the 8-bit S-box
+     for (size_t j=0; j<ROUND_TWEAKEY_BYTES; j++)
+     {
++        fprintf(DUMP, "        S-box (%zu/%zu)\n", j+1, (size_t)ROUND_TWEAKEY_BYTES);
++
+         // Decomposition into nibbles
+         x_hi = TMP_X[j] >> 4;
+         x_lo = TMP_X[j] & 0xf;
+@@ -162,14 +187,34 @@ static void _nonlinear_layer(
+         y_lo = TMP_Y[j] & 0xf;
+         z_hi = Z[j] >> 4;
+         z_lo = Z[j] & 0xf;
++
++        fprintf(DUMP, "            x_hi: %u\n", x_hi);
++        fprintf(DUMP, "            x_lo: %u\n", x_lo);
++        fprintf(DUMP, "            y_hi: %u\n", y_hi);
++        fprintf(DUMP, "            y_lo: %u\n", y_lo);
++        fprintf(DUMP, "            z_hi: %u\n", z_hi);
++        fprintf(DUMP, "            z_lo: %u\n", z_lo);
++
+         // First 4-bit S-box
++        fprintf(DUMP, "            First 4-bit S-box\n");
++
+         tmp0 = G[(y_lo&7)>>1][z_lo];
+         tmp1 = G[(z_lo&7)>>1][x_lo];
+         tmp2 = G[(x_lo&7)>>1][y_lo];
+         x_hi ^= F[tmp1][tmp2];
+         y_hi ^= F[tmp2][tmp0];
+         z_hi ^= F[tmp0][tmp1];
++
++        fprintf(DUMP, "            tmp0: %u\n", tmp0);
++        fprintf(DUMP, "            tmp1: %u\n", tmp1);
++        fprintf(DUMP, "            tmp2: %u\n", tmp2);
++        fprintf(DUMP, "            x_hi: %u\n", x_hi);
++        fprintf(DUMP, "            y_hi: %u\n", y_hi);
++        fprintf(DUMP, "            z_hi: %u\n", z_hi);
++
+         // Second 4-bit S-box
++        fprintf(DUMP, "            First 4-bit S-box\n");
++
+         tmp0 = P[Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]];
+         tmp1 = P[Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]];
+         tmp2 = P[Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]];
+@@ -183,12 +228,28 @@ static void _nonlinear_layer(
+         x_hi ^= F[tmp1][tmp2];
+         y_hi ^= F[tmp2][tmp0];
+         z_hi ^= F[tmp0][tmp1];
++
++        fprintf(DUMP, "            tmp0: %u\n", tmp0);
++        fprintf(DUMP, "            tmp1: %u\n", tmp1);
++        fprintf(DUMP, "            tmp2: %u\n", tmp2);
++        fprintf(DUMP, "            x_hi: %u\n", x_hi);
++        fprintf(DUMP, "            y_hi: %u\n", y_hi);
++        fprintf(DUMP, "            z_hi: %u\n", z_hi);
++
+         // Build bytes from nibbles
+         TMP_X[j] = (x_hi << 4 | x_lo);
+         TMP_Y[j] = (y_hi << 4 | y_lo);
+         TMP_Z[j] = (z_hi << 4 | z_lo);
++
++        debug_dump_buffer("TMP_X", sizeof(TMP_X), TMP_X, 12);
++        debug_dump_buffer("TMP_Y", sizeof(TMP_Y), TMP_Y, 12);
++        debug_dump_buffer("TMP_Z", sizeof(TMP_Z), TMP_Z, 12);
+     }
+ 
++    debug_dump_buffer("TMP_X (post-S-box)", sizeof(TMP_X), TMP_X, 12);
++    debug_dump_buffer("TMP_Y (post-S-box)", sizeof(TMP_Y), TMP_Y, 12);
++    debug_dump_buffer("TMP_Z (post-S-box)", sizeof(TMP_Z), TMP_Z, 12);
++
+     for (size_t j=0; j<8; j++)
+     {
+         size_t dest_j = 15-j;
+@@ -196,10 +257,16 @@ static void _nonlinear_layer(
+         Y[dest_j] ^= TMP_Y[j];
+         Z[dest_j] ^= TMP_Z[j];
+     }
++
++    debug_dump_buffer("X (post-XOR)", BLOCK_BYTES, X, 12);
++    debug_dump_buffer("Y (post-XOR)", BLOCK_BYTES, Y, 12);
++    debug_dump_buffer("Z (post-XOR)", BLOCK_BYTES, Z, 12);
+ }
+ 
+ static void _linear_layer(uint8_t X[BLOCK_BYTES])
+ {
++    fprintf(DUMP, "        linear layer\n");
++
+     X[15] ^= X[1];
+     X[15] ^= X[2];
+     X[15] ^= X[3];
+@@ -214,6 +281,8 @@ static void _linear_layer(uint8_t X[BLOCK_BYTES])
+     X[11] ^= X[7];
+     X[10] ^= X[7];
+     X[9]  ^= X[7];
++
++    debug_dump_buffer("X", BLOCK_BYTES, X, 12);
+ }
+ 
+ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
+@@ -223,6 +292,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
+         return;
+     }
+ 
++    fprintf(DUMP, "        permutation layer\n");
++
+     uint8_t X_old[BLOCK_BYTES];
+     memcpy(X_old, X, BLOCK_BYTES);
+ 
+@@ -232,6 +303,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
+     {
+         X[pi[j]] = X_old[j];
+     }
++
++    debug_dump_buffer("X", BLOCK_BYTES, X, 12);
+ }
+ 
+ static void _one_round_egfn(
+@@ -270,11 +343,15 @@ void lilliput_tbc_encrypt(
+     _compute_round_tweakeys(key, tweak, RTK_X, RTK_Y);
+ 
+ 
++    fprintf(DUMP, "running EGFN %zu times\n", (size_t)ROUNDS);
++
+     for (size_t i=0; i<ROUNDS-1; i++)
+     {
++        fprintf(DUMP, "    round %zu\n", (size_t)i);
+         _one_round_egfn(X, Y, Z, RTK_X[i], RTK_Y[i], PERMUTATION_ENCRYPTION);
+     }
+ 
++    fprintf(DUMP, "    round %zu\n", (size_t)(ROUNDS-1));
+     _one_round_egfn(X, Y, Z, RTK_X[ROUNDS-1], RTK_Y[ROUNDS-1], PERMUTATION_NONE);
+ 
+ 
+diff --git a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/random.c b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/random.c
+index a966a8e..8d5f2cc 100644
+--- a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/random.c
++++ b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/random.c
+@@ -21,6 +21,8 @@ This file provides a system-specific function to generate random bytes.
+ 
+ #define _GNU_SOURCE
+ 
++#include "debug.h"
++
+ #include <stddef.h>
+ #include <stdint.h>
+ 
+@@ -32,5 +34,6 @@ This file provides a system-specific function to generate random bytes.
+ 
+ void randombytes(size_t nb, uint8_t out[nb])
+ {
+-    syscall(SYS_getrandom, out, nb, 0);
++    for (size_t i=0; i<nb; i++)
++        out[i] = i;
+ }
+diff --git a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/tweakey.c b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/tweakey.c
+index e228a69..b1aadc6 100644
+--- a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/tweakey.c
++++ b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/add_threshold/tweakey.c
+@@ -20,6 +20,8 @@ This file provides a first-order threshold implementation of Lilliput-TBC's
+ tweakey schedule, where the tweak and the key are split into two shares.
+ */
+ 
++#include "debug.h"
++
+ #include <stdint.h>
+ #include <string.h>
+ 
+@@ -42,6 +44,7 @@ void tweakey_state_init(
+ {
+     uint8_t SHARES_0[KEY_BYTES];
+     randombytes(sizeof(SHARES_0), SHARES_0);
++    debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8);
+ 
+     memcpy(TK_Y, SHARES_0, KEY_BYTES);
+     memcpy(TK_X, tweak, TWEAK_BYTES);
+@@ -67,10 +70,16 @@ void tweakey_state_extract(
+     {
+         const uint8_t *TKj_X = TK_X + j*LANE_BYTES;
+ 
++        fprintf(DUMP, "        XORing lane %zu/%zu (RTK_X)\n", 1+j, (size_t)LANES_NB);
++        debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12);
++        debug_dump_buffer("lane[j]", LANE_BYTES, TKj_X, 12);
++
+         for (size_t k=0; k<LANE_BYTES; k++)
+         {
+             round_tweakey_X[k] ^= TKj_X[k];
+         }
++
++        debug_dump_buffer("=> RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12);
+     }
+ 
+ 
+@@ -78,10 +87,16 @@ void tweakey_state_extract(
+     {
+         const uint8_t *TKj_Y = TK_Y + j*LANE_BYTES;
+ 
++        fprintf(DUMP, "        XORing lane %zu/%zu (RTK_Y)\n", 1+j, (size_t)LANES_NB);
++        debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, round_tweakey_Y, 12);
++        debug_dump_buffer("lane[j]", LANE_BYTES, TKj_Y, 12);
++
+         for (size_t k=0; k<LANE_BYTES; k++)
+         {
+             round_tweakey_Y[k] ^= TKj_Y[k];
+         }
++
++        debug_dump_buffer("=> RTK_Y", ROUND_TWEAKEY_BYTES, round_tweakey_Y, 12);
+     }
+ 
+     round_tweakey_X[0] ^= round_constant;
+@@ -184,6 +199,10 @@ static const matrix_multiplication ALPHAS[6] = {
+     _multiply_MR3
+ };
+ 
++static char const * const ALPHAS_STR[6] = {
++    "M", "M²", "M³", "MR", "MR²", "MR³"
++};
++
+ 
+ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
+ {
+@@ -197,6 +216,10 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
+         memcpy(TKj_old_X, TKj_X, LANE_BYTES);
+ 
+         ALPHAS[j-1](TKj_old_X, TKj_X);
++
++        fprintf(DUMP, "        multiplying lane %zu/%zu by %s\n", 1+j, (size_t)LANES_NB, ALPHAS_STR[j-1]);
++        debug_dump_buffer("TK_j_X^i-1", LANE_BYTES, TKj_old_X, 12);
++        debug_dump_buffer("TK_j_X^i", LANE_BYTES, TKj_X, 12);
+     }
+ 
+     for (size_t j=0; j<(KEY_BYTES/LANE_BYTES); j++)
+@@ -211,5 +234,11 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES])
+ 
+         ALPHAS[j-1 + (TWEAK_BYTES/LANE_BYTES)](TKj_X_old, TKj_X);
+         ALPHAS[j-1 + (TWEAK_BYTES/LANE_BYTES)](TKj_Y_old, TKj_Y);
++
++        fprintf(DUMP, "        multiplying lane %zu/%zu by %s\n", 1+j + (TWEAK_BYTES/LANE_BYTES), (size_t)LANES_NB, ALPHAS_STR[j-1 + (TWEAK_BYTES/LANE_BYTES)]);
++        debug_dump_buffer("TK_j_X^i-1", LANE_BYTES, TKj_X_old, 12);
++        debug_dump_buffer("TK_j_X^i", LANE_BYTES, TKj_X, 12);
++        debug_dump_buffer("TK_j_Y^i-1", LANE_BYTES, TKj_Y_old, 12);
++        debug_dump_buffer("TK_j_Y^i", LANE_BYTES, TKj_Y, 12);
+     }
+ }