lilliput-ae-reference-implementation

Implementations of Lilliput-AE submitted to the NIST LWC standardization process
git clone https://git.kevinlegouguec.net/lilliput-ae-reference-implementation
Log | Files | Refs | README

cipher.c (9951B)


      1 /*
      2 Threshold Implementation of the Lilliput-AE tweakable block cipher.
      3 
      4 Authors, hereby denoted as "the implementer":
      5     Alexandre Adomnicai,
      6     Kévin Le Gouguec,
      7     Léo Reynaud,
      8     2019.
      9 
     10 For more information, feedback or questions, refer to our website:
     11 https://paclido.fr/lilliput-ae
     12 
     13 To the extent possible under law, the implementer has waived all copyright
     14 and related or neighboring rights to the source code in this file.
     15 http://creativecommons.org/publicdomain/zero/1.0/
     16 
     17 ---
     18 
     19 This file provides a first-order threshold implementation of the Lilliput-AE
     20 tweakable block cipher. The input block is split into 3 shares while the key
     21 is split into 2 shares for the tweakey schedule. The S-box relies on look-up
     22 tables and saves some memory usage at the cost of additional operations as
     23 described in the specification. This implementation operates on 3 shares
     24 throughout the entire round function in order to avoid extra randomness
     25 generation to switch from 2 shares to 3 shares and vice versa.
     26 */
     27 
     28 #include <stdint.h>
     29 #include <string.h>
     30 
     31 #include "cipher.h"
     32 #include "constants.h"
     33 #include "random.h"
     34 #include "tweakey.h"
     35 
     36 
     37 enum permutation
     38 {
     39     PERMUTATION_ENCRYPTION = 0, /* PI(i) */
     40     PERMUTATION_DECRYPTION = 1, /* PI^-1(i) */
     41     PERMUTATION_NONE
     42 };
     43 
     44 typedef enum permutation permutation;
     45 
     46 static const uint8_t PERMUTATIONS[2][BLOCK_BYTES] = {
     47     [PERMUTATION_ENCRYPTION] = { 13,  9, 14,  8, 10, 11, 12, 15,  4,  5,  3,  1,  2,  6,  0,  7 },
     48     [PERMUTATION_DECRYPTION] = { 14, 11, 12, 10,  8,  9, 13, 15,  3,  1,  4,  5,  6,  0,  2,  7 }
     49 };
     50 
     51 static const uint8_t F[16][16] = {
     52     {0x0, 0x2, 0x0, 0x2, 0x2, 0x0, 0x2, 0x0, 0x0, 0x2, 0x0, 0x2, 0x2, 0x0, 0x2, 0x0},
     53     {0x0, 0x2, 0x9, 0xb, 0x3, 0x1, 0xa, 0x8, 0xd, 0xf, 0x4, 0x6, 0xe, 0xc, 0x7, 0x5},
     54     {0x0, 0xb, 0x0, 0xb, 0xb, 0x0, 0xb, 0x0, 0x1, 0xa, 0x1, 0xa, 0xa, 0x1, 0xa, 0x1},
     55     {0x9, 0x2, 0x0, 0xb, 0x3, 0x8, 0xa, 0x1, 0x5, 0xe, 0xc, 0x7, 0xf, 0x4, 0x6, 0xd},
     56     {0x1, 0x2, 0x8, 0xb, 0x3, 0x0, 0xa, 0x9, 0x9, 0xa, 0x0, 0x3, 0xb, 0x8, 0x2, 0x1},
     57     {0x0, 0x3, 0x0, 0x3, 0x3, 0x0, 0x3, 0x0, 0x5, 0x6, 0x5, 0x6, 0x6, 0x5, 0x6, 0x5},
     58     {0x8, 0x2, 0x1, 0xb, 0x3, 0x9, 0xa, 0x0, 0x1, 0xb, 0x8, 0x2, 0xa, 0x0, 0x3, 0x9},
     59     {0x0, 0xa, 0x0, 0xa, 0xa, 0x0, 0xa, 0x0, 0x4, 0xe, 0x4, 0xe, 0xe, 0x4, 0xe, 0x4},
     60     {0x1, 0xe, 0x0, 0xf, 0xb, 0x4, 0xa, 0x5, 0x1, 0xe, 0x0, 0xf, 0xb, 0x4, 0xa, 0x5},
     61     {0xc, 0x3, 0x4, 0xb, 0x7, 0x8, 0xf, 0x0, 0x1, 0xe, 0x9, 0x6, 0xa, 0x5, 0x2, 0xd},
     62     {0x0, 0x6, 0x1, 0x7, 0x3, 0x5, 0x2, 0x4, 0x1, 0x7, 0x0, 0x6, 0x2, 0x4, 0x3, 0x5},
     63     {0x4, 0x2, 0xc, 0xa, 0x6, 0x0, 0xe, 0x8, 0x8, 0xe, 0x0, 0x6, 0xa, 0xc, 0x2, 0x4},
     64     {0x8, 0x6, 0x0, 0xe, 0x2, 0xc, 0xa, 0x4, 0x0, 0xe, 0x8, 0x6, 0xa, 0x4, 0x2, 0xc},
     65     {0x4, 0xa, 0x5, 0xb, 0xf, 0x1, 0xe, 0x0, 0x1, 0xf, 0x0, 0xe, 0xa, 0x4, 0xb, 0x5},
     66     {0x0, 0x7, 0x8, 0xf, 0x3, 0x4, 0xb, 0xc, 0x9, 0xe, 0x1, 0x6, 0xa, 0xd, 0x2, 0x5},
     67     {0x5, 0x2, 0x4, 0x3, 0x7, 0x0, 0x6, 0x1, 0x1, 0x6, 0x0, 0x7, 0x3, 0x4, 0x2, 0x5}
     68 };
     69 
     70 static const uint8_t G[4][16] = {
     71     {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
     72     {0x0, 0x1, 0x2, 0x3, 0x5, 0x4, 0x7, 0x6, 0x8, 0x9, 0xa, 0xb, 0xd, 0xc, 0xf, 0xe},
     73     {0x0, 0x1, 0x3, 0x2, 0x4, 0x5, 0x7, 0x6, 0x8, 0x9, 0xb, 0xa, 0xc, 0xd, 0xf, 0xe},
     74     {0x1, 0x0, 0x2, 0x3, 0x4, 0x5, 0x7, 0x6, 0x9, 0x8, 0xa, 0xb, 0xc, 0xd, 0xf, 0xe}
     75 };
     76 
     77 static const uint8_t Q[8][16] = {
     78     {0x0, 0x4, 0x2, 0x6, 0x8, 0xc, 0xa, 0xe, 0x1, 0x5, 0x3, 0x7, 0x9, 0xd, 0xb, 0xf},
     79     {0x0, 0x4, 0xa, 0xe, 0x8, 0xc, 0x2, 0x6, 0x3, 0x7, 0x9, 0xd, 0xb, 0xf, 0x1, 0x5},
     80     {0x0, 0xc, 0x2, 0xe, 0x8, 0x4, 0xa, 0x6, 0x1, 0xd, 0x3, 0xf, 0x9, 0x5, 0xb, 0x7},
     81     {0x8, 0x4, 0x2, 0xe, 0x0, 0xc, 0xa, 0x6, 0xb, 0x7, 0x1, 0xd, 0x3, 0xf, 0x9, 0x5},
     82     {0x0, 0x6, 0x2, 0x4, 0x8, 0xe, 0xa, 0xc, 0x1, 0x7, 0x3, 0x5, 0x9, 0xf, 0xb, 0xd},
     83     {0x2, 0x4, 0x8, 0xe, 0xa, 0xc, 0x0, 0x6, 0x1, 0x7, 0xb, 0xd, 0x9, 0xf, 0x3, 0x5},
     84     {0x0, 0xe, 0x2, 0xc, 0x8, 0x6, 0xa, 0x4, 0x1, 0xf, 0x3, 0xd, 0x9, 0x7, 0xb, 0x5},
     85     {0xa, 0x4, 0x0, 0xe, 0x2, 0xc, 0x8, 0x6, 0x9, 0x7, 0x3, 0xd, 0x1, 0xf, 0xb, 0x5}
     86 };
     87 
     88 static const uint8_t P[16] = {
     89     0x0, 0x2, 0x8, 0xa, 0x4, 0X6, 0xc, 0xe, 0x1, 0x3, 0x9, 0xb, 0x5, 0x7, 0xd, 0xf
     90 };
     91 
     92 static void _state_init(
     93     uint8_t X[BLOCK_BYTES],
     94     uint8_t Y[BLOCK_BYTES],
     95     uint8_t Z[BLOCK_BYTES],
     96     const uint8_t message[BLOCK_BYTES]
     97 )
     98 {
     99     uint8_t SHARES_0[BLOCK_BYTES];
    100     uint8_t SHARES_1[BLOCK_BYTES];
    101     randombytes(sizeof(SHARES_0), SHARES_0);
    102     randombytes(sizeof(SHARES_1), SHARES_1);
    103 
    104     memcpy(X, SHARES_0, BLOCK_BYTES);
    105     memcpy(Y, SHARES_1, BLOCK_BYTES);
    106     for (size_t i=0; i<BLOCK_BYTES; i++)
    107     {
    108         Z[i] = message[i] ^ SHARES_0[i] ^ SHARES_1[i];
    109     }
    110 }
    111 
    112 
    113 static void _compute_round_tweakeys(
    114     const uint8_t key[KEY_BYTES],
    115     const uint8_t tweak[TWEAK_BYTES],
    116     uint8_t RTK_X[ROUNDS][ROUND_TWEAKEY_BYTES],
    117     uint8_t RTK_Y[ROUNDS][ROUND_TWEAKEY_BYTES]
    118 )
    119 {
    120     uint8_t TK_X[TWEAKEY_BYTES];
    121     uint8_t TK_Y[TWEAKEY_BYTES];
    122     tweakey_state_init(TK_X, TK_Y, key, tweak);
    123     tweakey_state_extract(TK_X, TK_Y, 0, RTK_X[0], RTK_Y[0]);
    124 
    125     for (size_t i=1; i<ROUNDS; i++)
    126     {
    127         tweakey_state_update(TK_X, TK_Y);
    128         tweakey_state_extract(TK_X, TK_Y, i, RTK_X[i], RTK_Y[i]);
    129     }
    130 }
    131 
    132 
    133 static void _nonlinear_layer(
    134     uint8_t X[BLOCK_BYTES],
    135     uint8_t Y[BLOCK_BYTES],
    136     uint8_t Z[BLOCK_BYTES],
    137     const uint8_t RTK_X[ROUND_TWEAKEY_BYTES],
    138     const uint8_t RTK_Y[ROUND_TWEAKEY_BYTES]
    139 )
    140 {
    141     uint8_t x_hi, y_hi, z_hi;   // High nibbles for the Feistel network
    142     uint8_t x_lo, y_lo, z_lo;   // Low nibbles for the Feistel network
    143     uint8_t tmp0, tmp1, tmp2;
    144     uint8_t TMP_X[ROUND_TWEAKEY_BYTES];
    145     uint8_t TMP_Y[ROUND_TWEAKEY_BYTES];
    146     uint8_t TMP_Z[ROUND_TWEAKEY_BYTES];
    147 
    148     // Apply the RTK to two shares
    149     for (size_t j=0; j<ROUND_TWEAKEY_BYTES; j++)
    150     {
    151         TMP_X[j] = X[j] ^ RTK_X[j];
    152         TMP_Y[j] = Y[j] ^ RTK_Y[j];
    153     }
    154 
    155     // Threshold Implementation of the 8-bit S-box
    156     for (size_t j=0; j<ROUND_TWEAKEY_BYTES; j++)
    157     {
    158         // Decomposition into nibbles
    159         x_hi = TMP_X[j] >> 4;
    160         x_lo = TMP_X[j] & 0xf;
    161         y_hi = TMP_Y[j] >> 4;
    162         y_lo = TMP_Y[j] & 0xf;
    163         z_hi = Z[j] >> 4;
    164         z_lo = Z[j] & 0xf;
    165         // First 4-bit S-box
    166         tmp0 = G[(y_lo&7)>>1][z_lo];
    167         tmp1 = G[(z_lo&7)>>1][x_lo];
    168         tmp2 = G[(x_lo&7)>>1][y_lo];
    169         x_hi ^= F[tmp1][tmp2];
    170         y_hi ^= F[tmp2][tmp0];
    171         z_hi ^= F[tmp0][tmp1];
    172         // Second 4-bit S-box
    173         tmp0 = P[Q[y_hi&3 ^ (y_hi&8)>>1][z_hi]];
    174         tmp1 = P[Q[z_hi&3 ^ (z_hi&8)>>1][x_hi]];
    175         tmp2 = P[Q[x_hi&3 ^ (x_hi&8)>>1][y_hi]];
    176         x_lo ^= Q[tmp1&3 ^ (tmp1&8)>>1][tmp2];
    177         y_lo ^= Q[tmp2&3 ^ (tmp2&8)>>1][tmp0];
    178         z_lo ^= Q[tmp0&3 ^ (tmp0&8)>>1][tmp1];
    179         // Third 4-bit S-box
    180         tmp0 = G[(y_lo&7)>>1][z_lo] ^ 1;
    181         tmp1 = G[(z_lo&7)>>1][x_lo];
    182         tmp2 = G[(x_lo&7)>>1][y_lo];
    183         x_hi ^= F[tmp1][tmp2];
    184         y_hi ^= F[tmp2][tmp0];
    185         z_hi ^= F[tmp0][tmp1];
    186         // Build bytes from nibbles
    187         TMP_X[j] = (x_hi << 4 | x_lo);
    188         TMP_Y[j] = (y_hi << 4 | y_lo);
    189         TMP_Z[j] = (z_hi << 4 | z_lo);
    190     }
    191 
    192     for (size_t j=0; j<8; j++)
    193     {
    194         size_t dest_j = 15-j;
    195         X[dest_j] ^= TMP_X[j];
    196         Y[dest_j] ^= TMP_Y[j];
    197         Z[dest_j] ^= TMP_Z[j];
    198     }
    199 }
    200 
    201 static void _linear_layer(uint8_t X[BLOCK_BYTES])
    202 {
    203     X[15] ^= X[1];
    204     X[15] ^= X[2];
    205     X[15] ^= X[3];
    206     X[15] ^= X[4];
    207     X[15] ^= X[5];
    208     X[15] ^= X[6];
    209     X[15] ^= X[7];
    210 
    211     X[14] ^= X[7];
    212     X[13] ^= X[7];
    213     X[12] ^= X[7];
    214     X[11] ^= X[7];
    215     X[10] ^= X[7];
    216     X[9]  ^= X[7];
    217 }
    218 
    219 static void _permutation_layer(uint8_t X[BLOCK_BYTES], permutation p)
    220 {
    221     if (p == PERMUTATION_NONE)
    222     {
    223         return;
    224     }
    225 
    226     uint8_t X_old[BLOCK_BYTES];
    227     memcpy(X_old, X, BLOCK_BYTES);
    228 
    229     const uint8_t *pi = PERMUTATIONS[p];
    230 
    231     for (size_t j=0; j<BLOCK_BYTES; j++)
    232     {
    233         X[pi[j]] = X_old[j];
    234     }
    235 }
    236 
    237 static void _one_round_egfn(
    238     uint8_t X[BLOCK_BYTES],
    239     uint8_t Y[BLOCK_BYTES],
    240     uint8_t Z[BLOCK_BYTES],
    241     const uint8_t RTK_X[ROUND_TWEAKEY_BYTES],
    242     const uint8_t RTK_Y[ROUND_TWEAKEY_BYTES],
    243     permutation p
    244 )
    245 {
    246     _nonlinear_layer(X, Y, Z, RTK_X, RTK_Y);
    247     _linear_layer(X);
    248     _linear_layer(Y);
    249     _linear_layer(Z);
    250     _permutation_layer(X, p);
    251     _permutation_layer(Y, p);
    252     _permutation_layer(Z, p);
    253 }
    254 
    255 
    256 void lilliput_tbc_encrypt(
    257     const uint8_t key[KEY_BYTES],
    258     const uint8_t tweak[TWEAK_BYTES],
    259     const uint8_t message[BLOCK_BYTES],
    260     uint8_t ciphertext[BLOCK_BYTES]
    261 )
    262 {
    263     uint8_t X[BLOCK_BYTES];
    264     uint8_t Y[BLOCK_BYTES];
    265     uint8_t Z[BLOCK_BYTES];
    266     _state_init(X, Y, Z, message);
    267 
    268     uint8_t RTK_X[ROUNDS][ROUND_TWEAKEY_BYTES];
    269     uint8_t RTK_Y[ROUNDS][ROUND_TWEAKEY_BYTES];
    270     _compute_round_tweakeys(key, tweak, RTK_X, RTK_Y);
    271 
    272 
    273     for (size_t i=0; i<ROUNDS-1; i++)
    274     {
    275         _one_round_egfn(X, Y, Z, RTK_X[i], RTK_Y[i], PERMUTATION_ENCRYPTION);
    276     }
    277 
    278     _one_round_egfn(X, Y, Z, RTK_X[ROUNDS-1], RTK_Y[ROUNDS-1], PERMUTATION_NONE);
    279 
    280 
    281     for (size_t i=0; i<BLOCK_BYTES; i++)
    282     {
    283         ciphertext[i] = X[i] ^ Y[i] ^ Z[i];
    284     }
    285 }
    286 
    287 void lilliput_tbc_decrypt(
    288     const uint8_t key[KEY_BYTES],
    289     const uint8_t tweak[TWEAK_BYTES],
    290     const uint8_t ciphertext[BLOCK_BYTES],
    291     uint8_t message[BLOCK_BYTES]
    292 )
    293 {
    294     uint8_t X[BLOCK_BYTES];
    295     uint8_t Y[BLOCK_BYTES];
    296     uint8_t Z[BLOCK_BYTES];
    297     _state_init(X, Y, Z, ciphertext);
    298 
    299     uint8_t RTK_X[ROUNDS][ROUND_TWEAKEY_BYTES];
    300     uint8_t RTK_Y[ROUNDS][ROUND_TWEAKEY_BYTES];
    301     _compute_round_tweakeys(key, tweak, RTK_X, RTK_Y);
    302 
    303     for (size_t i=0; i<ROUNDS-1; i++)
    304     {
    305         _one_round_egfn(X, Y, Z, RTK_X[ROUNDS-1-i], RTK_Y[ROUNDS-1-i], PERMUTATION_DECRYPTION);
    306     }
    307 
    308     _one_round_egfn(X, Y, Z, RTK_X[0], RTK_Y[0], PERMUTATION_NONE);
    309 
    310     for (size_t i=0; i<BLOCK_BYTES; i++)
    311     {
    312         message[i] = X[i] ^ Y[i] ^ Z[i];
    313     }
    314 }