traces-tbc.patch (11917B)
1 diff --git a/src/add_threshold/cipher.c b/src/add_threshold/cipher.c 2 index 778a100..3b49db5 100644 3 --- a/src/add_threshold/cipher.c 4 +++ b/src/add_threshold/cipher.c 5 @@ -25,6 +25,8 @@ throughout the entire round function in order to avoid extra randomness 6 generation to switch from 2 shares to 3 shares and vice versa. 7 */ 8 9 +#include "debug.h" 10 + 11 #include <stdint.h> 12 #include <string.h> 13 14 @@ -100,6 +102,8 @@ static void _state_init( 15 uint8_t SHARES_1[BLOCK_BYTES]; 16 randombytes(sizeof(SHARES_0), SHARES_0); 17 randombytes(sizeof(SHARES_1), SHARES_1); 18 + debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8); 19 + debug_dump_buffer("SHARES_1", sizeof(SHARES_1), SHARES_1, 8); 20 21 memcpy(X, SHARES_0, BLOCK_BYTES); 22 memcpy(Y, SHARES_1, BLOCK_BYTES); 23 @@ -117,15 +121,25 @@ static void _compute_round_tweakeys( 24 uint8_t RTK_Y[ROUNDS][ROUND_TWEAKEY_BYTES] 25 ) 26 { 27 + fprintf(DUMP, "computing %zu round sub-tweakeys\n", (size_t)ROUNDS); 28 + 29 uint8_t TK_X[TWEAKEY_BYTES]; 30 uint8_t TK_Y[TWEAKEY_BYTES]; 31 tweakey_state_init(TK_X, TK_Y, key, tweak); 32 tweakey_state_extract(TK_X, TK_Y, 0, RTK_X[0], RTK_Y[0]); 33 34 + fprintf(DUMP, " 0\n"); 35 + debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, RTK_X[0], 8); 36 + debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, RTK_Y[0], 8); 37 + 38 for (size_t i=1; i<ROUNDS; i++) 39 { 40 tweakey_state_update(TK_X, TK_Y); 41 + debug_dump_buffer("TK_X", TWEAKEY_BYTES, TK_X, 8); 42 + debug_dump_buffer("TK_Y", TWEAKEY_BYTES, TK_Y, 8); 43 tweakey_state_extract(TK_X, TK_Y, i, RTK_X[i], RTK_Y[i]); 44 + debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, RTK_X[i], 8); 45 + debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, RTK_Y[i], 8); 46 } 47 } 48 49 @@ -138,6 +152,12 @@ static void _nonlinear_layer( 50 const uint8_t RTK_Y[ROUND_TWEAKEY_BYTES] 51 ) 52 { 53 + fprintf(DUMP, " nonlinear layer\n"); 54 + 55 + debug_dump_buffer("X", BLOCK_BYTES, X, 12); 56 + debug_dump_buffer("Y", BLOCK_BYTES, Y, 12); 57 + debug_dump_buffer("Z", BLOCK_BYTES, Z, 12); 58 + 59 uint8_t x_hi, y_hi, z_hi; // High nibbles for the Feistel network 60 uint8_t x_lo, y_lo, z_lo; // Low nibbles for the Feistel network 61 uint8_t tmp0, tmp1, tmp2; 62 @@ -152,9 +172,14 @@ static void _nonlinear_layer( 63 TMP_Y[j] = Y[j] ^ RTK_Y[j]; 64 } 65 66 + debug_dump_buffer("Xj XOR RTK_Xj", sizeof(TMP_X), TMP_X, 12); 67 + debug_dump_buffer("Yj XOR RTK_Yj", sizeof(TMP_Y), TMP_Y, 12); 68 + 69 // Threshold Implementation of the 8-bit S-box 70 for (size_t j=0; j<ROUND_TWEAKEY_BYTES; j++) 71 { 72 + fprintf(DUMP, " S-box (%zu/%zu)\n", j+1, (size_t)ROUND_TWEAKEY_BYTES); 73 + 74 // Decomposition into nibbles 75 x_hi = TMP_X[j] >> 4; 76 x_lo = TMP_X[j] & 0xf; 77 @@ -162,20 +187,54 @@ static void _nonlinear_layer( 78 y_lo = TMP_Y[j] & 0xf; 79 z_hi = Z[j] >> 4; 80 z_lo = Z[j] & 0xf; 81 + 82 + fprintf(DUMP, " x_hi: %u\n", x_hi); 83 + fprintf(DUMP, " x_lo: %u\n", x_lo); 84 + fprintf(DUMP, " y_hi: %u\n", y_hi); 85 + fprintf(DUMP, " y_lo: %u\n", y_lo); 86 + fprintf(DUMP, " z_hi: %u\n", z_hi); 87 + fprintf(DUMP, " z_lo: %u\n", z_lo); 88 + 89 // First 4-bit S-box 90 + fprintf(DUMP, " First 4-bit S-box\n"); 91 + 92 tmp0 = G[(y_lo&7)>>1][z_lo]; 93 tmp1 = G[(z_lo&7)>>1][x_lo]; 94 tmp2 = G[(x_lo&7)>>1][y_lo]; 95 x_hi ^= F[tmp1][tmp2]; 96 y_hi ^= F[tmp2][tmp0]; 97 z_hi ^= F[tmp0][tmp1]; 98 + 99 + fprintf(DUMP, " tmp0: %u\n", tmp0); 100 + fprintf(DUMP, " tmp1: %u\n", tmp1); 101 + fprintf(DUMP, " tmp2: %u\n", tmp2); 102 + fprintf(DUMP, " x_hi: %u\n", x_hi); 103 + fprintf(DUMP, " y_hi: %u\n", y_hi); 104 + fprintf(DUMP, " z_hi: %u\n", z_hi); 105 + 106 // Second 4-bit S-box 107 + fprintf(DUMP, " First 4-bit S-box\n"); 108 + 109 tmp0 = P[Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]]; 110 tmp1 = P[Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]]; 111 tmp2 = P[Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]]; 112 x_lo ^= Q[tmp1&3 ^ (tmp1&8)>>1][tmp2]; 113 y_lo ^= Q[tmp2&3 ^ (tmp2&8)>>1][tmp0]; 114 z_lo ^= Q[tmp0&3 ^ (tmp0&8)>>1][tmp1]; 115 + 116 + fprintf(DUMP, " y_hi&3 ^ (y_hi&8)>>1: %u\n", y_hi&3 ^ (y_hi&8)>>1); 117 + fprintf(DUMP, " z_hi&3 ^ (z_hi&8)>>1: %u\n", z_hi&3 ^ (z_hi&8)>>1); 118 + fprintf(DUMP, " x_hi&3 ^ (x_hi&8)>>1: %u\n", x_hi&3 ^ (x_hi&8)>>1); 119 + fprintf(DUMP, " Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]: %u\n", Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]); 120 + fprintf(DUMP, " Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]: %u\n", Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]); 121 + fprintf(DUMP, " Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]: %u\n", Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]); 122 + fprintf(DUMP, " tmp0: %u\n", tmp0); 123 + fprintf(DUMP, " tmp1: %u\n", tmp1); 124 + fprintf(DUMP, " tmp2: %u\n", tmp2); 125 + fprintf(DUMP, " x_lo: %u\n", x_lo); 126 + fprintf(DUMP, " y_lo: %u\n", y_lo); 127 + fprintf(DUMP, " z_lo: %u\n", z_lo); 128 + 129 // Third 4-bit S-box 130 tmp0 = G[(y_lo&7)>>1][z_lo] ^ 1; 131 tmp1 = G[(z_lo&7)>>1][x_lo]; 132 @@ -183,12 +242,28 @@ static void _nonlinear_layer( 133 x_hi ^= F[tmp1][tmp2]; 134 y_hi ^= F[tmp2][tmp0]; 135 z_hi ^= F[tmp0][tmp1]; 136 + 137 + fprintf(DUMP, " tmp0: %u\n", tmp0); 138 + fprintf(DUMP, " tmp1: %u\n", tmp1); 139 + fprintf(DUMP, " tmp2: %u\n", tmp2); 140 + fprintf(DUMP, " x_hi: %u\n", x_hi); 141 + fprintf(DUMP, " y_hi: %u\n", y_hi); 142 + fprintf(DUMP, " z_hi: %u\n", z_hi); 143 + 144 // Build bytes from nibbles 145 TMP_X[j] = (x_hi << 4 | x_lo); 146 TMP_Y[j] = (y_hi << 4 | y_lo); 147 TMP_Z[j] = (z_hi << 4 | z_lo); 148 + 149 + debug_dump_buffer("TMP_X", sizeof(TMP_X), TMP_X, 12); 150 + debug_dump_buffer("TMP_Y", sizeof(TMP_Y), TMP_Y, 12); 151 + debug_dump_buffer("TMP_Z", sizeof(TMP_Z), TMP_Z, 12); 152 } 153 154 + debug_dump_buffer("TMP_X (post-S-box)", sizeof(TMP_X), TMP_X, 12); 155 + debug_dump_buffer("TMP_Y (post-S-box)", sizeof(TMP_Y), TMP_Y, 12); 156 + debug_dump_buffer("TMP_Z (post-S-box)", sizeof(TMP_Z), TMP_Z, 12); 157 + 158 for (size_t j=0; j<8; j++) 159 { 160 size_t dest_j = 15-j; 161 @@ -196,10 +271,16 @@ static void _nonlinear_layer( 162 Y[dest_j] ^= TMP_Y[j]; 163 Z[dest_j] ^= TMP_Z[j]; 164 } 165 + 166 + debug_dump_buffer("X (post-XOR)", BLOCK_BYTES, X, 12); 167 + debug_dump_buffer("Y (post-XOR)", BLOCK_BYTES, Y, 12); 168 + debug_dump_buffer("Z (post-XOR)", BLOCK_BYTES, Z, 12); 169 } 170 171 static void _linear_layer(uint8_t X[BLOCK_BYTES]) 172 { 173 + fprintf(DUMP, " linear layer\n"); 174 + 175 X[15] ^= X[1]; 176 X[15] ^= X[2]; 177 X[15] ^= X[3]; 178 @@ -214,6 +295,8 @@ static void _linear_layer(uint8_t X[BLOCK_BYTES]) 179 X[11] ^= X[7]; 180 X[10] ^= X[7]; 181 X[9] ^= X[7]; 182 + 183 + debug_dump_buffer("X", BLOCK_BYTES, X, 12); 184 } 185 186 static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p) 187 @@ -223,6 +306,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p) 188 return; 189 } 190 191 + fprintf(DUMP, " permutation layer\n"); 192 + 193 uint8_t X_old[BLOCK_BYTES]; 194 memcpy(X_old, X, BLOCK_BYTES); 195 196 @@ -232,6 +317,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p) 197 { 198 X[pi[j]] = X_old[j]; 199 } 200 + 201 + debug_dump_buffer("X", BLOCK_BYTES, X, 12); 202 } 203 204 static void _one_round_egfn( 205 @@ -270,11 +357,15 @@ void lilliput_tbc_encrypt( 206 _compute_round_tweakeys(key, tweak, RTK_X, RTK_Y); 207 208 209 + fprintf(DUMP, "running EGFN %zu times\n", (size_t)ROUNDS); 210 + 211 for (size_t i=0; i<ROUNDS-1; i++) 212 { 213 + fprintf(DUMP, " round %zu\n", (size_t)i); 214 _one_round_egfn(X, Y, Z, RTK_X[i], RTK_Y[i], PERMUTATION_ENCRYPTION); 215 } 216 217 + fprintf(DUMP, " round %zu\n", (size_t)(ROUNDS-1)); 218 _one_round_egfn(X, Y, Z, RTK_X[ROUNDS-1], RTK_Y[ROUNDS-1], PERMUTATION_NONE); 219 220 221 diff --git a/src/add_threshold/random.c b/src/add_threshold/random.c 222 index a966a8e..8d5f2cc 100644 223 --- a/src/add_threshold/random.c 224 +++ b/src/add_threshold/random.c 225 @@ -21,6 +21,8 @@ This file provides a system-specific function to generate random bytes. 226 227 #define _GNU_SOURCE 228 229 +#include "debug.h" 230 + 231 #include <stddef.h> 232 #include <stdint.h> 233 234 @@ -32,5 +34,6 @@ This file provides a system-specific function to generate random bytes. 235 236 void randombytes(size_t nb, uint8_t out[nb]) 237 { 238 - syscall(SYS_getrandom, out, nb, 0); 239 + for (size_t i=0; i<nb; i++) 240 + out[i] = i; 241 } 242 diff --git a/src/add_threshold/tweakey.c b/src/add_threshold/tweakey.c 243 index 7822564..e1abbb6 100644 244 --- a/src/add_threshold/tweakey.c 245 +++ b/src/add_threshold/tweakey.c 246 @@ -20,6 +20,8 @@ This file provides a first-order threshold implementation of Lilliput-TBC's 247 tweakey schedule, where the tweak and the key are split into two shares. 248 */ 249 250 +#include "debug.h" 251 + 252 #include <stdint.h> 253 #include <string.h> 254 255 @@ -43,6 +45,7 @@ void tweakey_state_init( 256 { 257 uint8_t SHARES_0[KEY_BYTES]; 258 randombytes(sizeof(SHARES_0), SHARES_0); 259 + debug_dump_buffer("SHARES_0", sizeof(SHARES_0), SHARES_0, 8); 260 261 memcpy(TK_Y, SHARES_0, KEY_BYTES); 262 memcpy(TK_X, tweak, TWEAK_BYTES); 263 @@ -68,20 +71,32 @@ void tweakey_state_extract( 264 { 265 const uint8_t *TKj_X = TK_X + j*LANE_BYTES; 266 267 + fprintf(DUMP, " XORing lane %zu/%zu (RTK_X)\n", 1+j, (size_t)LANES_NB); 268 + debug_dump_buffer("RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12); 269 + debug_dump_buffer("lane[j]", LANE_BYTES, TKj_X, 12); 270 + 271 for (size_t k=0; k<LANE_BYTES; k++) 272 { 273 round_tweakey_X[k] ^= TKj_X[k]; 274 } 275 + 276 + debug_dump_buffer("=> RTK_X", ROUND_TWEAKEY_BYTES, round_tweakey_X, 12); 277 } 278 279 for (size_t j=0; j<KEY_LANES_NB; j++) 280 { 281 const uint8_t *TKj_Y = TK_Y + j*LANE_BYTES; 282 283 + fprintf(DUMP, " XORing lane %zu/%zu (RTK_Y)\n", 1+j, (size_t)LANES_NB); 284 + debug_dump_buffer("RTK_Y", ROUND_TWEAKEY_BYTES, round_tweakey_Y, 12); 285 + debug_dump_buffer("lane[j]", LANE_BYTES, TKj_Y, 12); 286 + 287 for (size_t k=0; k<LANE_BYTES; k++) 288 { 289 round_tweakey_Y[k] ^= TKj_Y[k]; 290 } 291 + 292 + debug_dump_buffer("=> RTK_Y", ROUND_TWEAKEY_BYTES, round_tweakey_Y, 12); 293 } 294 295 round_tweakey_X[0] ^= round_constant; 296 @@ -100,6 +115,10 @@ static const matrix_multiplication ALPHAS[7] = { 297 _multiply_MR3 298 }; 299 300 +static char const * const ALPHAS_STR[7] = { 301 + "M", "M²", "M³", "M⁴", "MR", "MR²", "MR³" 302 +}; 303 + 304 305 void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES]) 306 { 307 @@ -111,6 +130,10 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES]) 308 memcpy(TKj_old_X, TKj_X, LANE_BYTES); 309 310 ALPHAS[j](TKj_old_X, TKj_X); 311 + 312 + fprintf(DUMP, " multiplying lane %zu/%zu by %s\n", 1+j, (size_t)LANES_NB, ALPHAS_STR[j]); 313 + debug_dump_buffer("TK_j_X^i-1", LANE_BYTES, TKj_old_X, 12); 314 + debug_dump_buffer("TK_j_X^i", LANE_BYTES, TKj_X, 12); 315 } 316 317 for (size_t j=0; j<KEY_LANES_NB; j++) 318 @@ -125,5 +148,11 @@ void tweakey_state_update(uint8_t TK_X[TWEAKEY_BYTES], uint8_t TK_Y[KEY_BYTES]) 319 320 ALPHAS[j + TWEAK_LANES_NB](TKj_X_old, TKj_X); 321 ALPHAS[j + TWEAK_LANES_NB](TKj_Y_old, TKj_Y); 322 + 323 + fprintf(DUMP, " multiplying lane %zu/%zu by %s\n", 1+j + TWEAK_LANES_NB, (size_t)LANES_NB, ALPHAS_STR[j + TWEAK_LANES_NB]); 324 + debug_dump_buffer("TK_j_X^i-1", LANE_BYTES, TKj_X_old, 12); 325 + debug_dump_buffer("TK_j_X^i", LANE_BYTES, TKj_X, 12); 326 + debug_dump_buffer("TK_j_Y^i-1", LANE_BYTES, TKj_Y_old, 12); 327 + debug_dump_buffer("TK_j_Y^i", LANE_BYTES, TKj_Y, 12); 328 } 329 }