1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
diff --git a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/cipher.c b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/cipher.c
index 07405e1..0997cac 100644
--- a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/cipher.c
+++ b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/cipher.c
@@ -17,6 +17,8 @@ http://creativecommons.org/publicdomain/zero/1.0/
This file provides the implementation for Lilliput-TBC.
*/
+#include "debug.h"
+
#include <stdint.h>
#include <string.h>
@@ -71,33 +73,53 @@ static void _compute_round_tweakeys(
uint8_t RTK[ROUNDS][ROUND_TWEAKEY_BYTES]
)
{
+ fprintf(DUMP, "computing %zu round sub-tweakeys\n", (size_t)ROUNDS);
+
uint8_t TK[TWEAKEY_BYTES];
tweakey_state_init(TK, key, tweak);
tweakey_state_extract(TK, 0, RTK[0]);
+ fprintf(DUMP, " 0\n");
+ debug_dump_buffer("RTK", ROUND_TWEAKEY_BYTES, RTK[0], 8);
+
for (uint8_t i=1; i<ROUNDS; i++)
{
+ fprintf(DUMP, " %zu\n", (size_t)i);
+
tweakey_state_update(TK);
+ debug_dump_buffer("TK", TWEAKEY_BYTES, TK, 8);
tweakey_state_extract(TK, i, RTK[i]);
+ debug_dump_buffer("RTK", ROUND_TWEAKEY_BYTES, RTK[i], 8);
}
}
static uint8_t _Fj(uint8_t Xj, uint8_t RTKj)
{
+ fprintf(DUMP, " Xj: %02x; S[Xj]: %02x; RTKj: %02x; Fj: %02x\n", Xj, S[Xj], RTKj, S[Xj ^ RTKj]);
return S[Xj ^ RTKj];
}
static void _nonlinear_layer(uint8_t X[BLOCK_BYTES], const uint8_t RTK[ROUND_TWEAKEY_BYTES])
{
+ fprintf(DUMP, " nonlinear layer\n");
+
+ debug_dump_buffer("X", BLOCK_BYTES, X, 12);
+
for (size_t j=0; j<8; j++)
{
+ fprintf(DUMP, " j=%zu\n", j);
X[15-j] ^= _Fj(X[j], RTK[j]);
+ fprintf(DUMP, " X_{15-j} XOR Fj: %02x\n", X[15-j]);
}
+
+ debug_dump_buffer("X", BLOCK_BYTES, X, 12);
}
static void _linear_layer(uint8_t X[BLOCK_BYTES])
{
+ fprintf(DUMP, " linear layer\n");
+
for (size_t j=1; j<8; j++)
{
X[15] ^= X[j];
@@ -107,6 +129,8 @@ static void _linear_layer(uint8_t X[BLOCK_BYTES])
{
X[j] ^= X[7];
}
+
+ debug_dump_buffer("X", BLOCK_BYTES, X, 12);
}
static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
@@ -116,6 +140,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
return;
}
+ fprintf(DUMP, " permutation layer\n");
+
uint8_t X_old[BLOCK_BYTES];
memcpy(X_old, X, BLOCK_BYTES);
@@ -125,6 +151,8 @@ static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
{
X[pi[j]] = X_old[j];
}
+
+ debug_dump_buffer("X", BLOCK_BYTES, X, 12);
}
static void _one_round_egfn(uint8_t X[BLOCK_BYTES], const uint8_t RTK[ROUND_TWEAKEY_BYTES], permutation p)
@@ -148,11 +176,15 @@ void lilliput_tbc_encrypt(
uint8_t RTK[ROUNDS][ROUND_TWEAKEY_BYTES];
_compute_round_tweakeys(key, tweak, RTK);
+ fprintf(DUMP, "running EGFN %zu times\n", (size_t)ROUNDS);
+
for (size_t i=0; i<ROUNDS-1; i++)
{
+ fprintf(DUMP, " round %zu\n", i);
_one_round_egfn(X, RTK[i], PERMUTATION_ENCRYPTION);
}
+ fprintf(DUMP, " round %zu\n", (size_t)(ROUNDS-1));
_one_round_egfn(X, RTK[ROUNDS-1], PERMUTATION_NONE);
memcpy(ciphertext, X, BLOCK_BYTES);
diff --git a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/tweakey.c b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/tweakey.c
index 510f35a..4bf027c 100644
--- a/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/tweakey.c
+++ b/SOUMISSION_NIST/REFERENCE_IMPLEMENTATION/src/ref/tweakey.c
@@ -17,6 +17,8 @@ http://creativecommons.org/publicdomain/zero/1.0/
This file provides the implementation of Lilliput-TBC's tweakey schedule.
*/
+#include "debug.h"
+
#include <stdint.h>
#include <string.h>
@@ -51,10 +53,16 @@ void tweakey_state_extract(
{
const uint8_t *TKj = TK + j*LANE_BYTES;
+ fprintf(DUMP, " XORing lane %zu/%zu\n", 1+j, (size_t)LANES_NB);
+ debug_dump_buffer("RTK", ROUND_TWEAKEY_BYTES, round_tweakey, 12);
+ debug_dump_buffer("lane[j]", LANE_BYTES, TKj, 12);
+
for (size_t k=0; k<LANE_BYTES; k++)
{
round_tweakey[k] ^= TKj[k];
}
+
+ debug_dump_buffer("=> RTK", ROUND_TWEAKEY_BYTES, round_tweakey, 12);
}
round_tweakey[0] ^= round_constant;
@@ -73,6 +81,10 @@ static const matrix_multiplication ALPHAS[7] = {
_multiply_MR3
};
+static char const * const ALPHAS_STR[7] = {
+ "M", "M²", "M³", "M⁴", "MR", "MR²", "MR³"
+};
+
void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES])
{
@@ -84,5 +96,9 @@ void tweakey_state_update(uint8_t TK[TWEAKEY_BYTES])
memcpy(TKj_old, TKj, LANE_BYTES);
ALPHAS[j](TKj_old, TKj);
+
+ fprintf(DUMP, " multiplying lane %zu/%zu by %s\n", 1+j, (size_t)LANES_NB, ALPHAS_STR[j]);
+ debug_dump_buffer("TK_j^i-1", LANE_BYTES, TKj_old, 12);
+ debug_dump_buffer("TK_j^i", LANE_BYTES, TKj, 12);
}
}
|