diff --git a/src/add_threshold/cipher.c b/src/add_threshold/cipher.c index 778a100..3b49db5 100644 --- a/src/add_threshold/cipher.c +++ b/src/add_threshold/cipher.c @@ -25,6 +25,8 @@ throughout the entire round function in order to avoid extra randomness generation to switch from 2 shares to 3 shares and vice versa. */ +#include "debug.h" + #include #include @@ -100,6 +102,8 @@ static void _state_init( uint8_t SHARES_1[BLOCK_BYTES]; randombytes(sizeof(SHARES_0), SHARES_0); randombytes(sizeof(SHARES_1), SHARES_1); + debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8); + debug_dump_buffer("SHARES_1", sizeof(SHARES_1), SHARES_1, 8); memcpy(X, SHARES_0, BLOCK_BYTES); memcpy(Y, SHARES_1, BLOCK_BYTES); @@ -117,15 +121,25 @@ static void _compute_round_tweakeys( uint8_t RTK_Y[ROUNDS][ROUND_TWEAKEY_BYTES] ) { + fprintf(DUMP, "computing %zu round sub-tweakeys\n", (size_t)ROUNDS); + uint8_t TK_X[TWEAKEY_BYTES]; uint8_t TK_Y[TWEAKEY_BYTES]; tweakey_state_init(TK_X, TK_Y, key, tweak); tweakey_state_extract(TK_X, TK_Y, 0, RTK_X[0], RTK_Y[0]); + fprintf(DUMP, " 0\n"); + debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, RTK_X[0], 8); + debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, RTK_Y[0], 8); + for (size_t i=1; i> 4; x_lo = TMP_X[j] & 0xf; @@ -162,20 +187,54 @@ static void _nonlinear_layer( y_lo = TMP_Y[j] & 0xf; z_hi = Z[j] >> 4; z_lo = Z[j] & 0xf; + + fprintf(DUMP, " x_hi: %u\n", x_hi); + fprintf(DUMP, " x_lo: %u\n", x_lo); + fprintf(DUMP, " y_hi: %u\n", y_hi); + fprintf(DUMP, " y_lo: %u\n", y_lo); + fprintf(DUMP, " z_hi: %u\n", z_hi); + fprintf(DUMP, " z_lo: %u\n", z_lo); + // First 4-bit S-box + fprintf(DUMP, " First 4-bit S-box\n"); + tmp0 = G[(y_lo&7)>>1][z_lo]; tmp1 = G[(z_lo&7)>>1][x_lo]; tmp2 = G[(x_lo&7)>>1][y_lo]; x_hi ^= F[tmp1][tmp2]; y_hi ^= F[tmp2][tmp0]; z_hi ^= F[tmp0][tmp1]; + + fprintf(DUMP, " tmp0: %u\n", tmp0); + fprintf(DUMP, " tmp1: %u\n", tmp1); + fprintf(DUMP, " tmp2: %u\n", tmp2); + fprintf(DUMP, " x_hi: %u\n", x_hi); + fprintf(DUMP, " y_hi: %u\n", y_hi); + fprintf(DUMP, " z_hi: %u\n", z_hi); + // Second 4-bit S-box + fprintf(DUMP, " First 4-bit S-box\n"); + tmp0 = P[Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]]; tmp1 = P[Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]]; tmp2 = P[Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]]; x_lo ^= Q[tmp1&3 ^ (tmp1&8)>>1][tmp2]; y_lo ^= Q[tmp2&3 ^ (tmp2&8)>>1][tmp0]; z_lo ^= Q[tmp0&3 ^ (tmp0&8)>>1][tmp1]; + + fprintf(DUMP, " y_hi&3 ^ (y_hi&8)>>1: %u\n", y_hi&3 ^ (y_hi&8)>>1); + fprintf(DUMP, " z_hi&3 ^ (z_hi&8)>>1: %u\n", z_hi&3 ^ (z_hi&8)>>1); + fprintf(DUMP, " x_hi&3 ^ (x_hi&8)>>1: %u\n", x_hi&3 ^ (x_hi&8)>>1); + fprintf(DUMP, " Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]: %u\n", Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]); + fprintf(DUMP, " Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]: %u\n", Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]); + fprintf(DUMP, " Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]: %u\n", Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]); + fprintf(DUMP, " tmp0: %u\n", tmp0); + fprintf(DUMP, " tmp1: %u\n", tmp1); + fprintf(DUMP, " tmp2: %u\n", tmp2); + fprintf(DUMP, " x_lo: %u\n", x_lo); + fprintf(DUMP, " y_lo: %u\n", y_lo); + fprintf(DUMP, " z_lo: %u\n", z_lo); + // Third 4-bit S-box tmp0 = G[(y_lo&7)>>1][z_lo] ^ 1; tmp1 = G[(z_lo&7)>>1][x_lo]; @@ -183,12 +242,28 @@ static void _nonlinear_layer( x_hi ^= F[tmp1][tmp2]; y_hi ^= F[tmp2][tmp0]; z_hi ^= F[tmp0][tmp1]; + + fprintf(DUMP, " tmp0: %u\n", tmp0); + fprintf(DUMP, " tmp1: %u\n", tmp1); + fprintf(DUMP, " tmp2: %u\n", tmp2); + fprintf(DUMP, " x_hi: %u\n", x_hi); + fprintf(DUMP, " y_hi: %u\n", y_hi); + fprintf(DUMP, " z_hi: %u\n", z_hi); + // Build bytes from nibbles TMP_X[j] = (x_hi << 4 | x_lo); TMP_Y[j] = (y_hi << 4 | y_lo); TMP_Z[j] = (z_hi << 4 | z_lo); + + debug_dump_buffer("TMP_X", sizeof(TMP_X), TMP_X, 12); + debug_dump_buffer("TMP_Y", sizeof(TMP_Y), TMP_Y, 12); + debug_dump_buffer("TMP_Z", sizeof(TMP_Z), TMP_Z, 12); } + debug_dump_buffer("TMP_X (post-S-box)", sizeof(TMP_X), TMP_X, 12); + debug_dump_buffer("TMP_Y (post-S-box)", sizeof(TMP_Y), TMP_Y, 12); + debug_dump_buffer("TMP_Z (post-S-box)", sizeof(TMP_Z), TMP_Z, 12); + for (size_t j=0; j<8; j++) { size_t dest_j = 15-j; @@ -196,10 +271,16 @@ static void _nonlinear_layer( Y[dest_j] ^= TMP_Y[j]; Z[dest_j] ^= TMP_Z[j]; } + + debug_dump_buffer("X (post-XOR)", BLOCK_BYTES, X, 12); + debug_dump_buffer("Y (post-XOR)", BLOCK_BYTES, Y, 12); + debug_dump_buffer("Z (post-XOR)", BLOCK_BYTES, Z, 12); } static void _linear_layer(uint8_t X[BLOCK_BYTES]) { + fprintf(DUMP, " linear layer\n"); + X[15] ^= X[1]; X[15] ^= X[2]; X[15] ^= X[3]; @@ -214,6 +295,8 @@ static void _linear_layer(uint8_t X[BLOCK_BYTES]) X[11] ^= X[7]; X[10] ^= X[7]; X[9] ^= X[7]; + + debug_dump_buffer("X", BLOCK_BYTES, X, 12); } static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p) @@ -223,6 +306,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p) return; } + fprintf(DUMP, " permutation layer\n"); + uint8_t X_old[BLOCK_BYTES]; memcpy(X_old, X, BLOCK_BYTES); @@ -232,6 +317,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p) { X[pi[j]] = X_old[j]; } + + debug_dump_buffer("X", BLOCK_BYTES, X, 12); } static void _one_round_egfn( @@ -270,11 +357,15 @@ void lilliput_tbc_encrypt( _compute_round_tweakeys(key, tweak, RTK_X, RTK_Y); + fprintf(DUMP, "running EGFN %zu times\n", (size_t)ROUNDS); + for (size_t i=0; i #include @@ -32,5 +34,6 @@ This file provides a system-specific function to generate random bytes. void randombytes(size_t nb, uint8_t out[nb]) { - syscall(SYS_getrandom, out, nb, 0); + for (size_t i=0; i #include @@ -43,6 +45,7 @@ void tweakey_state_init( { uint8_t SHARES_0[KEY_BYTES]; randombytes(sizeof(SHARES_0), SHARES_0); + debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8); memcpy(TK_Y, SHARES_0, KEY_BYTES); memcpy(TK_X, tweak, TWEAK_BYTES); @@ -68,20 +71,32 @@ void tweakey_state_extract( { const uint8_t *TKj_X = TK_X + j*LANE_BYTES; + fprintf(DUMP, " XORing lane %zu/%zu (RTK_X)\n", 1+j, (size_t)LANES_NB); + debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12); + debug_dump_buffer("lane[j]", LANE_BYTES, TKj_X, 12); + for (size_t k=0; k RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12); } for (size_t j=0; j RTK_Y", ROUND_TWEAKEY_BYTES, round_tweakey_Y, 12); } round_tweakey_X[0] ^= round_constant; @@ -100,6 +115,10 @@ static const matrix_multiplication ALPHAS[7] = { _multiply_MR3 }; +static char const * const ALPHAS_STR[7] = { + "M", "M²", "M³", "M⁴", "MR", "MR²", "MR³" +}; + void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES]) { @@ -111,6 +130,10 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES]) memcpy(TKj_old_X, TKj_X, LANE_BYTES); ALPHAS[j](TKj_old_X, TKj_X); + + fprintf(DUMP, " multiplying lane %zu/%zu by %s\n", 1+j, (size_t)LANES_NB, ALPHAS_STR[j]); + debug_dump_buffer("TK_j_X^i-1", LANE_BYTES, TKj_old_X, 12); + debug_dump_buffer("TK_j_X^i", LANE_BYTES, TKj_X, 12); } for (size_t j=0; j