AC_CHECK_SIZEOF(long, 4)
AC_CHECK_SIZEOF(long long, 8)
+ PHP_CHECK_64BIT([
+ SHA3_DIR="sha3/generic32lc"
+ SHA3_OPT_SRC="$SHA3_DIR/KeccakP-1600-inplace32BI.c"
+ ],[
+ SHA3_DIR="sha3/generic64lc"
+ SHA3_OPT_SRC="$SHA3_DIR/KeccakP-1600-opt64.c"
+ ])
+ EXT_HASH_SHA3_SOURCES="$SHA3_OPT_SRC $SHA3_DIR/KeccakHash.c $SHA3_DIR/KeccakSponge.c"
+ PHP_HASH_CFLAGS="-I@ext_srcdir@/$SHA3_DIR -DKeccakP200_excluded -DKeccakP400_excluded -DKeccakP800_excluded"
EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c \
hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c hash_adler32.c \
- hash_crc32.c hash_fnv.c hash_joaat.c hash_sha3.c"
+ hash_crc32.c hash_fnv.c hash_joaat.c hash_sha3.c $EXT_HASH_SHA3_SOURCES"
EXT_HASH_HEADERS="php_hash.h php_hash_md.h php_hash_sha.h php_hash_ripemd.h \
php_hash_haval.h php_hash_tiger.h php_hash_gost.h php_hash_snefru.h \
php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h \
php_hash_fnv.h php_hash_joaat.h php_hash_sha3.h"
- PHP_NEW_EXTENSION(hash, $EXT_HASH_SOURCES, $ext_shared)
+ PHP_NEW_EXTENSION(hash, $EXT_HASH_SOURCES, $ext_shared,,$PHP_HASH_CFLAGS)
ifdef([PHP_INSTALL_HEADERS], [
PHP_INSTALL_HEADERS(ext/hash, $EXT_HASH_HEADERS)
])
}
if (PHP_HASH != "no") {
- AC_DEFINE('HAVE_HASH_EXT', 1);
- EXTENSION("hash", "hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c "
- + "hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c "
- + "hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c hash_sha3.c");
+ var sha3_arch_dir = "sha3/" + (X64 ? "generic64lc" : "generic32lc");
+ var sha3_dir = "ext/hash/" + sha3_arch_dir;
+ if (CHECK_HEADER_ADD_INCLUDE("KeccakHash.h", "CFLAGS_HASH", PHP_HASH + ";" + sha3_dir)) {
+ AC_DEFINE('HAVE_HASH_EXT', 1);
+ EXTENSION("hash", "hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c "
+ + "hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c "
+ + "hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c hash_sha3.c");
+
+ ADD_SOURCES(sha3_dir, "KeccakHash.c KeccakSponge.c " + (X64 ? "KeccakP-1600-opt64.c" : "KeccakP-1600-inplace32BI.c"),
+ "hash");
+ ADD_FLAG("CFLAGS_HASH", "/DKeccakP200_excluded /DKeccakP400_excluded /DKeccakP800_excluded");
+
PHP_INSTALL_HEADERS("ext/hash/", "php_hash.h php_hash_md.h php_hash_sha.h php_hash_ripemd.h " +
"php_hash_haval.h php_hash_tiger.h php_hash_gost.h php_hash_snefru.h " +
- "php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h");
+ "php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h ");
+ } else {
+ WARNING("gd not enabled; libraries and headers not found");
+ }
}
#include "php_hash.h"
#include "php_hash_sha3.h"
-#if (defined(__APPLE__) || defined(__APPLE_CC__)) && \
- (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
-# if defined(__LITTLE_ENDIAN__)
-# undef WORDS_BIGENDIAN
-# else
-# if defined(__BIG_ENDIAN__)
-# define WORDS_BIGENDIAN
-# endif
-# endif
-#endif
+#define SUCCESS SHA3_SUCCESS /* Avoid conflict between KeccacHash.h and zend_types.h */
+#include "KeccakHash.h"
-static inline uint64_t rol64(uint64_t v, unsigned char b) {
- return (v << b) | (v >> (64 - b));
-}
-static inline unsigned char idx(unsigned char x, unsigned char y) {
- return x + (5 * y);
-}
-
-#ifdef WORDS_BIGENDIAN
-static inline uint64_t load64(const unsigned char* x) {
- char i;
- uint64_t ret = 0;
- for (i = 7; i >= 0; --i) {
- ret <<= 8;
- ret |= x[i];
- }
- return ret;
-}
-static inline void store64(unsigned char* x, uint64_t val) {
- char i;
- for (i = 0; i < 8; ++i) {
- x[i] = val & 0xFF;
- val >>= 8;
- }
-}
-static inline void xor64(unsigned char* x, uint64_t val) {
- char i;
- for (i = 0; i < 8; ++i) {
- x[i] ^= val & 0xFF;
- val >>= 8;
- }
-}
-# define readLane(x, y) load64(ctx->state+sizeof(uint64_t)*idx(x, y))
-# define writeLane(x, y, v) store64(ctx->state+sizeof(uint64_t)*idx(x, y), v)
-# define XORLane(x, y, v) xor64(ctx->state+sizeof(uint64_t)*idx(x, y), v)
-#else
-# define readLane(x, y) (((uint64_t*)ctx->state)[idx(x,y)])
-# define writeLane(x, y, v) (((uint64_t*)ctx->state)[idx(x,y)] = v)
-# define XORLane(x, y, v) (((uint64_t*)ctx->state)[idx(x,y)] ^= v)
-#endif
-
-static inline char LFSR86540(unsigned char* pLFSR)
-{
- unsigned char LFSR = *pLFSR;
- char result = LFSR & 0x01;
- if (LFSR & 0x80) {
- // Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1
- LFSR = (LFSR << 1) ^ 0x71;
- } else {
- LFSR <<= 1;
- }
- *pLFSR = LFSR;
- return result;
-}
-
-static void permute(PHP_SHA3_CTX* ctx) {
- unsigned char LFSRstate = 0x01;
- unsigned char round;
-
- for (round = 0; round < 24; ++round) {
- { // Theta step (see [Keccak Reference, Section 2.3.2])
- uint64_t C[5], D;
- unsigned char x, y;
- for (x = 0; x < 5; ++x) {
- C[x] = readLane(x, 0) ^ readLane(x, 1) ^
- readLane(x, 2) ^ readLane(x, 3) ^ readLane(x, 4);
- }
- for (x = 0; x < 5; ++x) {
- D = C[(x+4)%5] ^ rol64(C[(x+1)%5], 1);
- for (y = 0; y < 5; ++y) {
- XORLane(x, y, D);
- }
- }
- }
-
- { // p and Pi steps (see [Keccak Reference, Sections 2.3.3 and 2.3.4])
- unsigned char x = 1, y = 0, t;
- uint64_t current = readLane(x, y);
- for (t = 0; t < 24; ++t) {
- unsigned char r = ((t + 1) * (t + 2) / 2) % 64;
- unsigned char Y = (2*x + 3*y) % 5;
- uint64_t temp;
- x = y;
- y = Y;
- temp = readLane(x, y);
- writeLane(x, y, rol64(current, r));
- current = temp;
- }
- }
-
- { // X step (see [Keccak Reference, Section 2.3.1])
- unsigned char x, y;
- for (y = 0; y < 5; ++y) {
- uint64_t temp[5];
- for (x = 0; x < 5; ++x) {
- temp[x] = readLane(x, y);
- }
- for (x = 0; x < 5; ++x) {
- writeLane(x, y, temp[x] ^((~temp[(x+1)%5]) & temp[(x+2)%5]));
- }
- }
- }
-
- { // i step (see [Keccak Reference, Section 2.3.5])
- unsigned char j;
- for (j = 0; j < 7; ++j) {
- if (LFSR86540(&LFSRstate)) {
- uint64_t bitPos = (1<<j) - 1;
- XORLane(0, 0, (uint64_t)1 << bitPos);
- }
- }
- }
- }
-}
// ==========================================================================
-static void PHP_SHA3_Init(PHP_SHA3_CTX* ctx,
- int bits) {
- memset(ctx, 0, sizeof(PHP_SHA3_CTX));
-}
-
-static void PHP_SHA3_Update(PHP_SHA3_CTX* ctx,
- const unsigned char* buf,
- unsigned int count,
- size_t block_size) {
- while (count > 0) {
- unsigned int len = block_size - ctx->pos;
- if (len > count) len = count;
- count -= len;
- while (len-- > 0) {
- ctx->state[ctx->pos++] ^= *(buf++);
- }
- if (ctx->pos >= block_size) {
- permute(ctx);
- ctx->pos = 0;
- }
- }
-}
-
-static void PHP_SHA3_Final(unsigned char* digest,
- PHP_SHA3_CTX* ctx,
- int block_size,
- int digest_size) {
- int len = digest_size;
-
- // Pad state to finalize
- ctx->state[ctx->pos++] ^= 0x06;
- ctx->state[block_size-1] ^= 0x80;
- permute(ctx);
-
- // Square output for digest
- for(;;) {
- int bs = (len < block_size) ? len : block_size;
- memcpy(digest, ctx->state, bs);
- digest += bs;
- len -= bs;
- if (!len) break;
- permute(ctx);
- }
-
- // Zero out context
- memset(ctx, 0, sizeof(PHP_SHA3_CTX));
+static int hash_sha3_copy(const void *ops, void *orig_context, void *dest_context)
+{
+ PHP_SHA3_CTX* orig = (PHP_SHA3_CTX*)orig_context;
+ PHP_SHA3_CTX* dest = (PHP_SHA3_CTX*)dest_context;
+ memcpy(dest->hashinstance, orig->hashinstance, sizeof(Keccak_HashInstance));
+ return SUCCESS;
}
-// ==========================================================================
-
#define DECLARE_SHA3_OPS(bits) \
void PHP_SHA3##bits##Init(PHP_SHA3_##bits##_CTX* ctx) { \
- PHP_SHA3_Init(ctx, bits); \
+ ctx->hashinstance = emalloc(sizeof(Keccak_HashInstance)); \
+ Keccak_HashInitialize_SHA3_##bits((Keccak_HashInstance *)ctx->hashinstance); \
} \
void PHP_SHA3##bits##Update(PHP_SHA3_##bits##_CTX* ctx, \
const unsigned char* input, \
unsigned int inputLen) { \
- PHP_SHA3_Update(ctx, input, inputLen, \
- (1600 - (2 * bits)) >> 3); \
+ Keccak_HashUpdate((Keccak_HashInstance *)ctx->hashinstance, input, inputLen * 8); \
} \
void PHP_SHA3##bits##Final(unsigned char* digest, \
PHP_SHA3_##bits##_CTX* ctx) { \
- PHP_SHA3_Final(digest, ctx, \
- (1600 - (2 * bits)) >> 3, \
- bits >> 3); \
+ Keccak_HashFinal((Keccak_HashInstance *)ctx->hashinstance, digest); \
+ efree(ctx->hashinstance); \
+ ctx->hashinstance = NULL; \
} \
const php_hash_ops php_hash_sha3_##bits##_ops = { \
(php_hash_init_func_t) PHP_SHA3##bits##Init, \
(php_hash_update_func_t) PHP_SHA3##bits##Update, \
(php_hash_final_func_t) PHP_SHA3##bits##Final, \
- php_hash_copy, \
+ hash_sha3_copy, \
bits >> 3, \
(1600 - (2 * bits)) >> 3, \
sizeof(PHP_SHA3_##bits##_CTX), \
#include "php.h"
typedef struct {
- unsigned char state[200]; // 5 * 5 * sizeof(uint64)
- uint32_t pos;
+ void *hashinstance;
} PHP_SHA3_CTX;
typedef PHP_SHA3_CTX PHP_SHA3_224_CTX;
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+#include "KeccakHash.h"
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
+{
+ HashReturn result;
+
+ if (delimitedSuffix == 0)
+ return FAIL;
+ result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity);
+ if (result != SUCCESS)
+ return result;
+ instance->fixedOutputLength = hashbitlen;
+ instance->delimitedSuffix = delimitedSuffix;
+ return SUCCESS;
+}
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, DataLength databitlen)
+{
+ if ((databitlen % 8) == 0)
+ return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
+ else {
+ HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
+ if (ret == SUCCESS) {
+ /* The last partial byte is assumed to be aligned on the least significant bits */
+ unsigned char lastByte = data[databitlen/8];
+ /* Concatenate the last few bits provided here with those of the suffix */
+ unsigned short delimitedLastBytes = (unsigned short)((unsigned short)lastByte | ((unsigned short)instance->delimitedSuffix << (databitlen % 8)));
+ if ((delimitedLastBytes & 0xFF00) == 0x0000) {
+ instance->delimitedSuffix = delimitedLastBytes & 0xFF;
+ }
+ else {
+ unsigned char oneByte[1];
+ oneByte[0] = delimitedLastBytes & 0xFF;
+ ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1);
+ instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF;
+ }
+ }
+ return ret;
+ }
+}
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval)
+{
+ HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix);
+ if (ret == SUCCESS)
+ return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8);
+ else
+ return ret;
+}
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, DataLength databitlen)
+{
+ if ((databitlen % 8) != 0)
+ return FAIL;
+ return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8);
+}
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakHashInterface_h_
+#define _KeccakHashInterface_h_
+
+#ifndef KeccakP1600_excluded
+
+#include "KeccakSponge.h"
+#include <string.h>
+
+typedef unsigned char BitSequence;
+typedef size_t DataLength;
+typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn;
+
+typedef struct {
+ KeccakWidth1600_SpongeInstance sponge;
+ unsigned int fixedOutputLength;
+ unsigned char delimitedSuffix;
+} Keccak_HashInstance;
+
+/**
+ * Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode.
+ * @param hashInstance Pointer to the hash instance to be initialized.
+ * @param rate The value of the rate r.
+ * @param capacity The value of the capacity c.
+ * @param hashbitlen The desired number of output bits,
+ * or 0 for an arbitrarily-long output.
+ * @param delimitedSuffix Bits that will be automatically appended to the end
+ * of the input message, as in domain separation.
+ * This is a byte containing from 0 to 7 bits
+ * formatted like the @a delimitedData parameter of
+ * the Keccak_SpongeAbsorbLastFewBits() function.
+ * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);
+
+/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F)
+
+/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F)
+
+/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06)
+
+/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06)
+
+/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06)
+
+/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06)
+
+/**
+ * Function to give input data to be absorbed.
+ * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
+ * @param data Pointer to the input data.
+ * When @a databitLen is not a multiple of 8, the last bits of data must be
+ * in the least significant bits of the last byte (little-endian convention).
+ * @param databitLen The number of input bits provided in the input data.
+ * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, DataLength databitlen);
+
+/**
+ * Function to call after all input blocks have been input and to get
+ * output bits if the length was specified when calling Keccak_HashInitialize().
+ * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
+ * If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of
+ * output bits is equal to @a hashbitlen.
+ * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits
+ * must be extracted using the Keccak_HashSqueeze() function.
+ * @param hashval Pointer to the buffer where to store the output data.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval);
+
+ /**
+ * Function to squeeze output data.
+ * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
+ * @param data Pointer to the buffer where to store the output data.
+ * @param databitlen The number of output bits desired (must be a multiple of 8).
+ * @pre Keccak_HashFinal() must have been already called.
+ * @pre @a databitlen is a multiple of 8.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, DataLength databitlen);
+
+#endif
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakP_1600_SnP_h_
+#define _KeccakP_1600_SnP_h_
+
+/** For the documentation, see SnP-documentation.h.
+ */
+
+#define KeccakP1600_implementation "in-place 32-bit optimized implementation"
+#define KeccakP1600_stateSizeInBytes 200
+#define KeccakP1600_stateAlignment 8
+
+#define KeccakP1600_StaticInitialize()
+void KeccakP1600_Initialize(void *state);
+void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
+void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
+void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
+void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
+void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
+void KeccakP1600_Permute_12rounds(void *state);
+void KeccakP1600_Permute_24rounds(void *state);
+void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
+void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+#include "brg_endian.h"
+#include "KeccakP-1600-SnP.h"
+#include "SnP-Relaned.h"
+
+typedef unsigned char UINT8;
+typedef unsigned int UINT32;
+/* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */
+/* typedef unsigned long UINT32; */
+
+#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
+
+/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
+#define prepareToBitInterleaving(low, high, temp, temp0, temp1) \
+ temp0 = (low); \
+ temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \
+ temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \
+ temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \
+ temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \
+ temp1 = (high); \
+ temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); \
+ temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \
+ temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \
+ temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8);
+
+#define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \
+ prepareToBitInterleaving(low, high, temp, temp0, temp1) \
+ even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \
+ odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000);
+
+#define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \
+ prepareToBitInterleaving(low, high, temp, temp0, temp1) \
+ even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \
+ odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000);
+
+#define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \
+ prepareToBitInterleaving(low, high, temp, temp0, temp1) \
+ even = (temp0 & 0x0000FFFF) | (temp1 << 16); \
+ odd = (temp0 >> 16) | (temp1 & 0xFFFF0000);
+
+/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
+#define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
+ temp0 = (even); \
+ temp1 = (odd); \
+ temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \
+ temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \
+ temp0 = temp; \
+ temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \
+ temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \
+ temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \
+ temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \
+ temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); \
+ temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \
+ temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \
+ temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1);
+
+#define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \
+ prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
+ low = temp0; \
+ high = temp1;
+
+#define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \
+ prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
+ lowOut = lowIn ^ temp0; \
+ highOut = highIn ^ temp1;
+
+void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length)
+{
+ UINT8 laneAsBytes[8];
+ UINT32 low, high;
+ UINT32 temp, temp0, temp1;
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+
+ memset(laneAsBytes, 0xFF, offset);
+ memset(laneAsBytes+offset, 0x00, length);
+ memset(laneAsBytes+offset+length, 0xFF, 8-offset-length);
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ low = *((UINT32*)(laneAsBytes+0));
+ high = *((UINT32*)(laneAsBytes+4));
+#else
+ low = laneAsBytes[0]
+ | ((UINT32)(laneAsBytes[1]) << 8)
+ | ((UINT32)(laneAsBytes[2]) << 16)
+ | ((UINT32)(laneAsBytes[3]) << 24);
+ high = laneAsBytes[4]
+ | ((UINT32)(laneAsBytes[5]) << 8)
+ | ((UINT32)(laneAsBytes[6]) << 16)
+ | ((UINT32)(laneAsBytes[7]) << 24);
+#endif
+ toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Initialize(void *state)
+{
+ memset(state, 0, 200);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
+{
+ unsigned int lanePosition = offset/8;
+ unsigned int offsetInLane = offset%8;
+ UINT32 low, high;
+ UINT32 temp, temp0, temp1;
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+
+ if (offsetInLane < 4) {
+ low = (UINT32)byte << (offsetInLane*8);
+ high = 0;
+ }
+ else {
+ low = 0;
+ high = (UINT32)byte << ((offsetInLane-4)*8);
+ }
+ toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ UINT8 laneAsBytes[8];
+ UINT32 low, high;
+ UINT32 temp, temp0, temp1;
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+
+ memset(laneAsBytes, 0, 8);
+ memcpy(laneAsBytes+offset, data, length);
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ low = *((UINT32*)(laneAsBytes+0));
+ high = *((UINT32*)(laneAsBytes+4));
+#else
+ low = laneAsBytes[0]
+ | ((UINT32)(laneAsBytes[1]) << 8)
+ | ((UINT32)(laneAsBytes[2]) << 16)
+ | ((UINT32)(laneAsBytes[3]) << 24);
+ high = laneAsBytes[4]
+ | ((UINT32)(laneAsBytes[5]) << 8)
+ | ((UINT32)(laneAsBytes[6]) << 16)
+ | ((UINT32)(laneAsBytes[7]) << 24);
+#endif
+ toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ const UINT32 * pI = (const UINT32 *)data;
+ UINT32 * pS = (UINT32*)state;
+ UINT32 t, x0, x1;
+ int i;
+ for (i = laneCount-1; i >= 0; --i) {
+#ifdef NO_MISALIGNED_ACCESSES
+ UINT32 low;
+ UINT32 high;
+ memcpy(&low, pI++, 4);
+ memcpy(&high, pI++, 4);
+ toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1);
+#else
+ toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
+#endif
+ }
+#else
+ unsigned int lanePosition;
+ for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
+ UINT8 laneAsBytes[8];
+ memcpy(laneAsBytes, data+lanePosition*8, 8);
+ UINT32 low = laneAsBytes[0]
+ | ((UINT32)(laneAsBytes[1]) << 8)
+ | ((UINT32)(laneAsBytes[2]) << 16)
+ | ((UINT32)(laneAsBytes[3]) << 24);
+ UINT32 high = laneAsBytes[4]
+ | ((UINT32)(laneAsBytes[5]) << 8)
+ | ((UINT32)(laneAsBytes[6]) << 16)
+ | ((UINT32)(laneAsBytes[7]) << 24);
+ UINT32 even, odd, temp, temp0, temp1;
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ KeccakP1600_SetBytesInLaneToZero(state, lanePosition, offset, length);
+ KeccakP1600_AddBytesInLane(state, lanePosition, data, offset, length);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ const UINT32 * pI = (const UINT32 *)data;
+ UINT32 * pS = (UINT32 *)state;
+ UINT32 t, x0, x1;
+ int i;
+ for (i = laneCount-1; i >= 0; --i) {
+#ifdef NO_MISALIGNED_ACCESSES
+ UINT32 low;
+ UINT32 high;
+ memcpy(&low, pI++, 4);
+ memcpy(&high, pI++, 4);
+ toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1);
+#else
+ toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
+#endif
+ }
+#else
+ unsigned int lanePosition;
+ for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
+ UINT8 laneAsBytes[8];
+ memcpy(laneAsBytes, data+lanePosition*8, 8);
+ UINT32 low = laneAsBytes[0]
+ | ((UINT32)(laneAsBytes[1]) << 8)
+ | ((UINT32)(laneAsBytes[2]) << 16)
+ | ((UINT32)(laneAsBytes[3]) << 24);
+ UINT32 high = laneAsBytes[4]
+ | ((UINT32)(laneAsBytes[5]) << 8)
+ | ((UINT32)(laneAsBytes[6]) << 16)
+ | ((UINT32)(laneAsBytes[7]) << 24);
+ UINT32 even, odd, temp, temp0, temp1;
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ toBitInterleavingAndSet(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
+{
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ unsigned int i;
+
+ for(i=0; i<byteCount/8; i++) {
+ stateAsHalfLanes[i*2+0] = 0;
+ stateAsHalfLanes[i*2+1] = 0;
+ }
+ if (byteCount%8 != 0)
+ KeccakP1600_SetBytesInLaneToZero(state, byteCount/8, 0, byteCount%8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
+{
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ UINT32 low, high, temp, temp0, temp1;
+ UINT8 laneAsBytes[8];
+
+ fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ *((UINT32*)(laneAsBytes+0)) = low;
+ *((UINT32*)(laneAsBytes+4)) = high;
+#else
+ laneAsBytes[0] = low & 0xFF;
+ laneAsBytes[1] = (low >> 8) & 0xFF;
+ laneAsBytes[2] = (low >> 16) & 0xFF;
+ laneAsBytes[3] = (low >> 24) & 0xFF;
+ laneAsBytes[4] = high & 0xFF;
+ laneAsBytes[5] = (high >> 8) & 0xFF;
+ laneAsBytes[6] = (high >> 16) & 0xFF;
+ laneAsBytes[7] = (high >> 24) & 0xFF;
+#endif
+ memcpy(data, laneAsBytes+offset, length);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ UINT32 * pI = (UINT32 *)data;
+ const UINT32 * pS = ( const UINT32 *)state;
+ UINT32 t, x0, x1;
+ int i;
+ for (i = laneCount-1; i >= 0; --i) {
+#ifdef NO_MISALIGNED_ACCESSES
+ UINT32 low;
+ UINT32 high;
+ fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
+ memcpy(pI++, &low, 4);
+ memcpy(pI++, &high, 4);
+#else
+ fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1)
+#endif
+ }
+#else
+ unsigned int lanePosition;
+ for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ UINT32 low, high, temp, temp0, temp1;
+ fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
+ UINT8 laneAsBytes[8];
+ laneAsBytes[0] = low & 0xFF;
+ laneAsBytes[1] = (low >> 8) & 0xFF;
+ laneAsBytes[2] = (low >> 16) & 0xFF;
+ laneAsBytes[3] = (low >> 24) & 0xFF;
+ laneAsBytes[4] = high & 0xFF;
+ laneAsBytes[5] = (high >> 8) & 0xFF;
+ laneAsBytes[6] = (high >> 16) & 0xFF;
+ laneAsBytes[7] = (high >> 24) & 0xFF;
+ memcpy(data+lanePosition*8, laneAsBytes, 8);
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
+{
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ UINT32 low, high, temp, temp0, temp1;
+ UINT8 laneAsBytes[8];
+ unsigned int i;
+
+ fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ *((UINT32*)(laneAsBytes+0)) = low;
+ *((UINT32*)(laneAsBytes+4)) = high;
+#else
+ laneAsBytes[0] = low & 0xFF;
+ laneAsBytes[1] = (low >> 8) & 0xFF;
+ laneAsBytes[2] = (low >> 16) & 0xFF;
+ laneAsBytes[3] = (low >> 24) & 0xFF;
+ laneAsBytes[4] = high & 0xFF;
+ laneAsBytes[5] = (high >> 8) & 0xFF;
+ laneAsBytes[6] = (high >> 16) & 0xFF;
+ laneAsBytes[7] = (high >> 24) & 0xFF;
+#endif
+ for(i=0; i<length; i++)
+ output[i] = input[i] ^ laneAsBytes[offset+i];
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ const UINT32 * pI = (const UINT32 *)input;
+ UINT32 * pO = (UINT32 *)output;
+ const UINT32 * pS = (const UINT32 *)state;
+ UINT32 t, x0, x1;
+ int i;
+ for (i = laneCount-1; i >= 0; --i) {
+#ifdef NO_MISALIGNED_ACCESSES
+ UINT32 low;
+ UINT32 high;
+ fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
+ *(pO++) = *(pI++) ^ low;
+ *(pO++) = *(pI++) ^ high;
+#else
+ fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1)
+#endif
+ }
+#else
+ unsigned int lanePosition;
+ for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ UINT32 low, high, temp, temp0, temp1;
+ fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
+ UINT8 laneAsBytes[8];
+ laneAsBytes[0] = low & 0xFF;
+ laneAsBytes[1] = (low >> 8) & 0xFF;
+ laneAsBytes[2] = (low >> 16) & 0xFF;
+ laneAsBytes[3] = (low >> 24) & 0xFF;
+ laneAsBytes[4] = high & 0xFF;
+ laneAsBytes[5] = (high >> 8) & 0xFF;
+ laneAsBytes[6] = (high >> 16) & 0xFF;
+ laneAsBytes[7] = (high >> 24) & 0xFF;
+ ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0));
+ ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4));
+ }
+#endif
+}
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
+{
+ SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] =
+{
+ 0x00000001UL, 0x00000000UL,
+ 0x00000000UL, 0x00000089UL,
+ 0x00000000UL, 0x8000008bUL,
+ 0x00000000UL, 0x80008080UL,
+ 0x00000001UL, 0x0000008bUL,
+ 0x00000001UL, 0x00008000UL,
+ 0x00000001UL, 0x80008088UL,
+ 0x00000001UL, 0x80000082UL,
+ 0x00000000UL, 0x0000000bUL,
+ 0x00000000UL, 0x0000000aUL,
+ 0x00000001UL, 0x00008082UL,
+ 0x00000000UL, 0x00008003UL,
+ 0x00000001UL, 0x0000808bUL,
+ 0x00000001UL, 0x8000000bUL,
+ 0x00000001UL, 0x8000008aUL,
+ 0x00000001UL, 0x80000081UL,
+ 0x00000000UL, 0x80000081UL,
+ 0x00000000UL, 0x80000008UL,
+ 0x00000000UL, 0x00000083UL,
+ 0x00000000UL, 0x80008003UL,
+ 0x00000001UL, 0x80008088UL,
+ 0x00000000UL, 0x80000088UL,
+ 0x00000001UL, 0x00008000UL,
+ 0x00000000UL, 0x80008082UL,
+ 0x000000FFUL
+};
+
+#define KeccakRound0() \
+ Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
+ Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
+ Da0 = Cx^ROL32(Du1, 1); \
+ Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
+ Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
+ Da1 = Cz^Du0; \
+ Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
+ Do0 = Cw^ROL32(Cz, 1); \
+ Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
+ Do1 = Cy^Cx; \
+ Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
+ De0 = Cx^ROL32(Cy, 1); \
+ Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
+ De1 = Cz^Cw; \
+ Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
+ Di0 = Du0^ROL32(Cy, 1); \
+ Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
+ Di1 = Du1^Cw; \
+ Du0 = Cw^ROL32(Cz, 1); \
+ Du1 = Cy^Cx; \
+\
+ Ba = (Aba0^Da0); \
+ Be = ROL32((Age0^De0), 22); \
+ Bi = ROL32((Aki1^Di1), 22); \
+ Bo = ROL32((Amo1^Do1), 11); \
+ Bu = ROL32((Asu0^Du0), 7); \
+ Aba0 = Ba ^((~Be)& Bi ); \
+ Aba0 ^= *(pRoundConstants++); \
+ Age0 = Be ^((~Bi)& Bo ); \
+ Aki1 = Bi ^((~Bo)& Bu ); \
+ Amo1 = Bo ^((~Bu)& Ba ); \
+ Asu0 = Bu ^((~Ba)& Be ); \
+ Ba = (Aba1^Da1); \
+ Be = ROL32((Age1^De1), 22); \
+ Bi = ROL32((Aki0^Di0), 21); \
+ Bo = ROL32((Amo0^Do0), 10); \
+ Bu = ROL32((Asu1^Du1), 7); \
+ Aba1 = Ba ^((~Be)& Bi ); \
+ Aba1 ^= *(pRoundConstants++); \
+ Age1 = Be ^((~Bi)& Bo ); \
+ Aki0 = Bi ^((~Bo)& Bu ); \
+ Amo0 = Bo ^((~Bu)& Ba ); \
+ Asu1 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Aka1^Da1), 2); \
+ Bo = ROL32((Ame1^De1), 23); \
+ Bu = ROL32((Asi1^Di1), 31); \
+ Ba = ROL32((Abo0^Do0), 14); \
+ Be = ROL32((Agu0^Du0), 10); \
+ Aka1 = Ba ^((~Be)& Bi ); \
+ Ame1 = Be ^((~Bi)& Bo ); \
+ Asi1 = Bi ^((~Bo)& Bu ); \
+ Abo0 = Bo ^((~Bu)& Ba ); \
+ Agu0 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Aka0^Da0), 1); \
+ Bo = ROL32((Ame0^De0), 22); \
+ Bu = ROL32((Asi0^Di0), 30); \
+ Ba = ROL32((Abo1^Do1), 14); \
+ Be = ROL32((Agu1^Du1), 10); \
+ Aka0 = Ba ^((~Be)& Bi ); \
+ Ame0 = Be ^((~Bi)& Bo ); \
+ Asi0 = Bi ^((~Bo)& Bu ); \
+ Abo1 = Bo ^((~Bu)& Ba ); \
+ Agu1 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Asa0^Da0), 9); \
+ Ba = ROL32((Abe1^De1), 1); \
+ Be = ROL32((Agi0^Di0), 3); \
+ Bi = ROL32((Ako1^Do1), 13); \
+ Bo = ROL32((Amu0^Du0), 4); \
+ Asa0 = Ba ^((~Be)& Bi ); \
+ Abe1 = Be ^((~Bi)& Bo ); \
+ Agi0 = Bi ^((~Bo)& Bu ); \
+ Ako1 = Bo ^((~Bu)& Ba ); \
+ Amu0 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Asa1^Da1), 9); \
+ Ba = (Abe0^De0); \
+ Be = ROL32((Agi1^Di1), 3); \
+ Bi = ROL32((Ako0^Do0), 12); \
+ Bo = ROL32((Amu1^Du1), 4); \
+ Asa1 = Ba ^((~Be)& Bi ); \
+ Abe0 = Be ^((~Bi)& Bo ); \
+ Agi1 = Bi ^((~Bo)& Bu ); \
+ Ako0 = Bo ^((~Bu)& Ba ); \
+ Amu1 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Aga0^Da0), 18); \
+ Bi = ROL32((Ake0^De0), 5); \
+ Bo = ROL32((Ami1^Di1), 8); \
+ Bu = ROL32((Aso0^Do0), 28); \
+ Ba = ROL32((Abu1^Du1), 14); \
+ Aga0 = Ba ^((~Be)& Bi ); \
+ Ake0 = Be ^((~Bi)& Bo ); \
+ Ami1 = Bi ^((~Bo)& Bu ); \
+ Aso0 = Bo ^((~Bu)& Ba ); \
+ Abu1 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Aga1^Da1), 18); \
+ Bi = ROL32((Ake1^De1), 5); \
+ Bo = ROL32((Ami0^Di0), 7); \
+ Bu = ROL32((Aso1^Do1), 28); \
+ Ba = ROL32((Abu0^Du0), 13); \
+ Aga1 = Ba ^((~Be)& Bi ); \
+ Ake1 = Be ^((~Bi)& Bo ); \
+ Ami0 = Bi ^((~Bo)& Bu ); \
+ Aso1 = Bo ^((~Bu)& Ba ); \
+ Abu0 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Ama1^Da1), 21); \
+ Bu = ROL32((Ase0^De0), 1); \
+ Ba = ROL32((Abi0^Di0), 31); \
+ Be = ROL32((Ago1^Do1), 28); \
+ Bi = ROL32((Aku1^Du1), 20); \
+ Ama1 = Ba ^((~Be)& Bi ); \
+ Ase0 = Be ^((~Bi)& Bo ); \
+ Abi0 = Bi ^((~Bo)& Bu ); \
+ Ago1 = Bo ^((~Bu)& Ba ); \
+ Aku1 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Ama0^Da0), 20); \
+ Bu = ROL32((Ase1^De1), 1); \
+ Ba = ROL32((Abi1^Di1), 31); \
+ Be = ROL32((Ago0^Do0), 27); \
+ Bi = ROL32((Aku0^Du0), 19); \
+ Ama0 = Ba ^((~Be)& Bi ); \
+ Ase1 = Be ^((~Bi)& Bo ); \
+ Abi1 = Bi ^((~Bo)& Bu ); \
+ Ago0 = Bo ^((~Bu)& Ba ); \
+ Aku0 = Bu ^((~Ba)& Be )
+
+#define KeccakRound1() \
+ Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \
+ Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \
+ Da0 = Cx^ROL32(Du1, 1); \
+ Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \
+ Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \
+ Da1 = Cz^Du0; \
+ Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \
+ Do0 = Cw^ROL32(Cz, 1); \
+ Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \
+ Do1 = Cy^Cx; \
+ Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \
+ De0 = Cx^ROL32(Cy, 1); \
+ Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \
+ De1 = Cz^Cw; \
+ Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \
+ Di0 = Du0^ROL32(Cy, 1); \
+ Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \
+ Di1 = Du1^Cw; \
+ Du0 = Cw^ROL32(Cz, 1); \
+ Du1 = Cy^Cx; \
+\
+ Ba = (Aba0^Da0); \
+ Be = ROL32((Ame1^De0), 22); \
+ Bi = ROL32((Agi1^Di1), 22); \
+ Bo = ROL32((Aso1^Do1), 11); \
+ Bu = ROL32((Aku1^Du0), 7); \
+ Aba0 = Ba ^((~Be)& Bi ); \
+ Aba0 ^= *(pRoundConstants++); \
+ Ame1 = Be ^((~Bi)& Bo ); \
+ Agi1 = Bi ^((~Bo)& Bu ); \
+ Aso1 = Bo ^((~Bu)& Ba ); \
+ Aku1 = Bu ^((~Ba)& Be ); \
+ Ba = (Aba1^Da1); \
+ Be = ROL32((Ame0^De1), 22); \
+ Bi = ROL32((Agi0^Di0), 21); \
+ Bo = ROL32((Aso0^Do0), 10); \
+ Bu = ROL32((Aku0^Du1), 7); \
+ Aba1 = Ba ^((~Be)& Bi ); \
+ Aba1 ^= *(pRoundConstants++); \
+ Ame0 = Be ^((~Bi)& Bo ); \
+ Agi0 = Bi ^((~Bo)& Bu ); \
+ Aso0 = Bo ^((~Bu)& Ba ); \
+ Aku0 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Asa1^Da1), 2); \
+ Bo = ROL32((Ake1^De1), 23); \
+ Bu = ROL32((Abi1^Di1), 31); \
+ Ba = ROL32((Amo1^Do0), 14); \
+ Be = ROL32((Agu0^Du0), 10); \
+ Asa1 = Ba ^((~Be)& Bi ); \
+ Ake1 = Be ^((~Bi)& Bo ); \
+ Abi1 = Bi ^((~Bo)& Bu ); \
+ Amo1 = Bo ^((~Bu)& Ba ); \
+ Agu0 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Asa0^Da0), 1); \
+ Bo = ROL32((Ake0^De0), 22); \
+ Bu = ROL32((Abi0^Di0), 30); \
+ Ba = ROL32((Amo0^Do1), 14); \
+ Be = ROL32((Agu1^Du1), 10); \
+ Asa0 = Ba ^((~Be)& Bi ); \
+ Ake0 = Be ^((~Bi)& Bo ); \
+ Abi0 = Bi ^((~Bo)& Bu ); \
+ Amo0 = Bo ^((~Bu)& Ba ); \
+ Agu1 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Ama1^Da0), 9); \
+ Ba = ROL32((Age1^De1), 1); \
+ Be = ROL32((Asi1^Di0), 3); \
+ Bi = ROL32((Ako0^Do1), 13); \
+ Bo = ROL32((Abu1^Du0), 4); \
+ Ama1 = Ba ^((~Be)& Bi ); \
+ Age1 = Be ^((~Bi)& Bo ); \
+ Asi1 = Bi ^((~Bo)& Bu ); \
+ Ako0 = Bo ^((~Bu)& Ba ); \
+ Abu1 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Ama0^Da1), 9); \
+ Ba = (Age0^De0); \
+ Be = ROL32((Asi0^Di1), 3); \
+ Bi = ROL32((Ako1^Do0), 12); \
+ Bo = ROL32((Abu0^Du1), 4); \
+ Ama0 = Ba ^((~Be)& Bi ); \
+ Age0 = Be ^((~Bi)& Bo ); \
+ Asi0 = Bi ^((~Bo)& Bu ); \
+ Ako1 = Bo ^((~Bu)& Ba ); \
+ Abu0 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Aka1^Da0), 18); \
+ Bi = ROL32((Abe1^De0), 5); \
+ Bo = ROL32((Ami0^Di1), 8); \
+ Bu = ROL32((Ago1^Do0), 28); \
+ Ba = ROL32((Asu1^Du1), 14); \
+ Aka1 = Ba ^((~Be)& Bi ); \
+ Abe1 = Be ^((~Bi)& Bo ); \
+ Ami0 = Bi ^((~Bo)& Bu ); \
+ Ago1 = Bo ^((~Bu)& Ba ); \
+ Asu1 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Aka0^Da1), 18); \
+ Bi = ROL32((Abe0^De1), 5); \
+ Bo = ROL32((Ami1^Di0), 7); \
+ Bu = ROL32((Ago0^Do1), 28); \
+ Ba = ROL32((Asu0^Du0), 13); \
+ Aka0 = Ba ^((~Be)& Bi ); \
+ Abe0 = Be ^((~Bi)& Bo ); \
+ Ami1 = Bi ^((~Bo)& Bu ); \
+ Ago0 = Bo ^((~Bu)& Ba ); \
+ Asu0 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Aga1^Da1), 21); \
+ Bu = ROL32((Ase0^De0), 1); \
+ Ba = ROL32((Aki1^Di0), 31); \
+ Be = ROL32((Abo1^Do1), 28); \
+ Bi = ROL32((Amu1^Du1), 20); \
+ Aga1 = Ba ^((~Be)& Bi ); \
+ Ase0 = Be ^((~Bi)& Bo ); \
+ Aki1 = Bi ^((~Bo)& Bu ); \
+ Abo1 = Bo ^((~Bu)& Ba ); \
+ Amu1 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Aga0^Da0), 20); \
+ Bu = ROL32((Ase1^De1), 1); \
+ Ba = ROL32((Aki0^Di1), 31); \
+ Be = ROL32((Abo0^Do0), 27); \
+ Bi = ROL32((Amu0^Du0), 19); \
+ Aga0 = Ba ^((~Be)& Bi ); \
+ Ase1 = Be ^((~Bi)& Bo ); \
+ Aki0 = Bi ^((~Bo)& Bu ); \
+ Abo0 = Bo ^((~Bu)& Ba ); \
+ Amu0 = Bu ^((~Ba)& Be );
+
+#define KeccakRound2() \
+ Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \
+ Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \
+ Da0 = Cx^ROL32(Du1, 1); \
+ Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \
+ Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \
+ Da1 = Cz^Du0; \
+ Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \
+ Do0 = Cw^ROL32(Cz, 1); \
+ Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \
+ Do1 = Cy^Cx; \
+ Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \
+ De0 = Cx^ROL32(Cy, 1); \
+ Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \
+ De1 = Cz^Cw; \
+ Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \
+ Di0 = Du0^ROL32(Cy, 1); \
+ Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \
+ Di1 = Du1^Cw; \
+ Du0 = Cw^ROL32(Cz, 1); \
+ Du1 = Cy^Cx; \
+\
+ Ba = (Aba0^Da0); \
+ Be = ROL32((Ake1^De0), 22); \
+ Bi = ROL32((Asi0^Di1), 22); \
+ Bo = ROL32((Ago0^Do1), 11); \
+ Bu = ROL32((Amu1^Du0), 7); \
+ Aba0 = Ba ^((~Be)& Bi ); \
+ Aba0 ^= *(pRoundConstants++); \
+ Ake1 = Be ^((~Bi)& Bo ); \
+ Asi0 = Bi ^((~Bo)& Bu ); \
+ Ago0 = Bo ^((~Bu)& Ba ); \
+ Amu1 = Bu ^((~Ba)& Be ); \
+ Ba = (Aba1^Da1); \
+ Be = ROL32((Ake0^De1), 22); \
+ Bi = ROL32((Asi1^Di0), 21); \
+ Bo = ROL32((Ago1^Do0), 10); \
+ Bu = ROL32((Amu0^Du1), 7); \
+ Aba1 = Ba ^((~Be)& Bi ); \
+ Aba1 ^= *(pRoundConstants++); \
+ Ake0 = Be ^((~Bi)& Bo ); \
+ Asi1 = Bi ^((~Bo)& Bu ); \
+ Ago1 = Bo ^((~Bu)& Ba ); \
+ Amu0 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Ama0^Da1), 2); \
+ Bo = ROL32((Abe0^De1), 23); \
+ Bu = ROL32((Aki0^Di1), 31); \
+ Ba = ROL32((Aso1^Do0), 14); \
+ Be = ROL32((Agu0^Du0), 10); \
+ Ama0 = Ba ^((~Be)& Bi ); \
+ Abe0 = Be ^((~Bi)& Bo ); \
+ Aki0 = Bi ^((~Bo)& Bu ); \
+ Aso1 = Bo ^((~Bu)& Ba ); \
+ Agu0 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Ama1^Da0), 1); \
+ Bo = ROL32((Abe1^De0), 22); \
+ Bu = ROL32((Aki1^Di0), 30); \
+ Ba = ROL32((Aso0^Do1), 14); \
+ Be = ROL32((Agu1^Du1), 10); \
+ Ama1 = Ba ^((~Be)& Bi ); \
+ Abe1 = Be ^((~Bi)& Bo ); \
+ Aki1 = Bi ^((~Bo)& Bu ); \
+ Aso0 = Bo ^((~Bu)& Ba ); \
+ Agu1 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Aga1^Da0), 9); \
+ Ba = ROL32((Ame0^De1), 1); \
+ Be = ROL32((Abi1^Di0), 3); \
+ Bi = ROL32((Ako1^Do1), 13); \
+ Bo = ROL32((Asu1^Du0), 4); \
+ Aga1 = Ba ^((~Be)& Bi ); \
+ Ame0 = Be ^((~Bi)& Bo ); \
+ Abi1 = Bi ^((~Bo)& Bu ); \
+ Ako1 = Bo ^((~Bu)& Ba ); \
+ Asu1 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Aga0^Da1), 9); \
+ Ba = (Ame1^De0); \
+ Be = ROL32((Abi0^Di1), 3); \
+ Bi = ROL32((Ako0^Do0), 12); \
+ Bo = ROL32((Asu0^Du1), 4); \
+ Aga0 = Ba ^((~Be)& Bi ); \
+ Ame1 = Be ^((~Bi)& Bo ); \
+ Abi0 = Bi ^((~Bo)& Bu ); \
+ Ako0 = Bo ^((~Bu)& Ba ); \
+ Asu0 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Asa1^Da0), 18); \
+ Bi = ROL32((Age1^De0), 5); \
+ Bo = ROL32((Ami1^Di1), 8); \
+ Bu = ROL32((Abo1^Do0), 28); \
+ Ba = ROL32((Aku0^Du1), 14); \
+ Asa1 = Ba ^((~Be)& Bi ); \
+ Age1 = Be ^((~Bi)& Bo ); \
+ Ami1 = Bi ^((~Bo)& Bu ); \
+ Abo1 = Bo ^((~Bu)& Ba ); \
+ Aku0 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Asa0^Da1), 18); \
+ Bi = ROL32((Age0^De1), 5); \
+ Bo = ROL32((Ami0^Di0), 7); \
+ Bu = ROL32((Abo0^Do1), 28); \
+ Ba = ROL32((Aku1^Du0), 13); \
+ Asa0 = Ba ^((~Be)& Bi ); \
+ Age0 = Be ^((~Bi)& Bo ); \
+ Ami0 = Bi ^((~Bo)& Bu ); \
+ Abo0 = Bo ^((~Bu)& Ba ); \
+ Aku1 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Aka0^Da1), 21); \
+ Bu = ROL32((Ase0^De0), 1); \
+ Ba = ROL32((Agi1^Di0), 31); \
+ Be = ROL32((Amo0^Do1), 28); \
+ Bi = ROL32((Abu0^Du1), 20); \
+ Aka0 = Ba ^((~Be)& Bi ); \
+ Ase0 = Be ^((~Bi)& Bo ); \
+ Agi1 = Bi ^((~Bo)& Bu ); \
+ Amo0 = Bo ^((~Bu)& Ba ); \
+ Abu0 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Aka1^Da0), 20); \
+ Bu = ROL32((Ase1^De1), 1); \
+ Ba = ROL32((Agi0^Di1), 31); \
+ Be = ROL32((Amo1^Do0), 27); \
+ Bi = ROL32((Abu1^Du0), 19); \
+ Aka1 = Ba ^((~Be)& Bi ); \
+ Ase1 = Be ^((~Bi)& Bo ); \
+ Agi0 = Bi ^((~Bo)& Bu ); \
+ Amo1 = Bo ^((~Bu)& Ba ); \
+ Abu1 = Bu ^((~Ba)& Be );
+
+#define KeccakRound3() \
+ Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \
+ Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \
+ Da0 = Cx^ROL32(Du1, 1); \
+ Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \
+ Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \
+ Da1 = Cz^Du0; \
+ Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \
+ Do0 = Cw^ROL32(Cz, 1); \
+ Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \
+ Do1 = Cy^Cx; \
+ Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \
+ De0 = Cx^ROL32(Cy, 1); \
+ Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \
+ De1 = Cz^Cw; \
+ Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \
+ Di0 = Du0^ROL32(Cy, 1); \
+ Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \
+ Di1 = Du1^Cw; \
+ Du0 = Cw^ROL32(Cz, 1); \
+ Du1 = Cy^Cx; \
+\
+ Ba = (Aba0^Da0); \
+ Be = ROL32((Abe0^De0), 22); \
+ Bi = ROL32((Abi0^Di1), 22); \
+ Bo = ROL32((Abo0^Do1), 11); \
+ Bu = ROL32((Abu0^Du0), 7); \
+ Aba0 = Ba ^((~Be)& Bi ); \
+ Aba0 ^= *(pRoundConstants++); \
+ Abe0 = Be ^((~Bi)& Bo ); \
+ Abi0 = Bi ^((~Bo)& Bu ); \
+ Abo0 = Bo ^((~Bu)& Ba ); \
+ Abu0 = Bu ^((~Ba)& Be ); \
+ Ba = (Aba1^Da1); \
+ Be = ROL32((Abe1^De1), 22); \
+ Bi = ROL32((Abi1^Di0), 21); \
+ Bo = ROL32((Abo1^Do0), 10); \
+ Bu = ROL32((Abu1^Du1), 7); \
+ Aba1 = Ba ^((~Be)& Bi ); \
+ Aba1 ^= *(pRoundConstants++); \
+ Abe1 = Be ^((~Bi)& Bo ); \
+ Abi1 = Bi ^((~Bo)& Bu ); \
+ Abo1 = Bo ^((~Bu)& Ba ); \
+ Abu1 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Aga0^Da1), 2); \
+ Bo = ROL32((Age0^De1), 23); \
+ Bu = ROL32((Agi0^Di1), 31); \
+ Ba = ROL32((Ago0^Do0), 14); \
+ Be = ROL32((Agu0^Du0), 10); \
+ Aga0 = Ba ^((~Be)& Bi ); \
+ Age0 = Be ^((~Bi)& Bo ); \
+ Agi0 = Bi ^((~Bo)& Bu ); \
+ Ago0 = Bo ^((~Bu)& Ba ); \
+ Agu0 = Bu ^((~Ba)& Be ); \
+ Bi = ROL32((Aga1^Da0), 1); \
+ Bo = ROL32((Age1^De0), 22); \
+ Bu = ROL32((Agi1^Di0), 30); \
+ Ba = ROL32((Ago1^Do1), 14); \
+ Be = ROL32((Agu1^Du1), 10); \
+ Aga1 = Ba ^((~Be)& Bi ); \
+ Age1 = Be ^((~Bi)& Bo ); \
+ Agi1 = Bi ^((~Bo)& Bu ); \
+ Ago1 = Bo ^((~Bu)& Ba ); \
+ Agu1 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Aka0^Da0), 9); \
+ Ba = ROL32((Ake0^De1), 1); \
+ Be = ROL32((Aki0^Di0), 3); \
+ Bi = ROL32((Ako0^Do1), 13); \
+ Bo = ROL32((Aku0^Du0), 4); \
+ Aka0 = Ba ^((~Be)& Bi ); \
+ Ake0 = Be ^((~Bi)& Bo ); \
+ Aki0 = Bi ^((~Bo)& Bu ); \
+ Ako0 = Bo ^((~Bu)& Ba ); \
+ Aku0 = Bu ^((~Ba)& Be ); \
+ Bu = ROL32((Aka1^Da1), 9); \
+ Ba = (Ake1^De0); \
+ Be = ROL32((Aki1^Di1), 3); \
+ Bi = ROL32((Ako1^Do0), 12); \
+ Bo = ROL32((Aku1^Du1), 4); \
+ Aka1 = Ba ^((~Be)& Bi ); \
+ Ake1 = Be ^((~Bi)& Bo ); \
+ Aki1 = Bi ^((~Bo)& Bu ); \
+ Ako1 = Bo ^((~Bu)& Ba ); \
+ Aku1 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Ama0^Da0), 18); \
+ Bi = ROL32((Ame0^De0), 5); \
+ Bo = ROL32((Ami0^Di1), 8); \
+ Bu = ROL32((Amo0^Do0), 28); \
+ Ba = ROL32((Amu0^Du1), 14); \
+ Ama0 = Ba ^((~Be)& Bi ); \
+ Ame0 = Be ^((~Bi)& Bo ); \
+ Ami0 = Bi ^((~Bo)& Bu ); \
+ Amo0 = Bo ^((~Bu)& Ba ); \
+ Amu0 = Bu ^((~Ba)& Be ); \
+ Be = ROL32((Ama1^Da1), 18); \
+ Bi = ROL32((Ame1^De1), 5); \
+ Bo = ROL32((Ami1^Di0), 7); \
+ Bu = ROL32((Amo1^Do1), 28); \
+ Ba = ROL32((Amu1^Du0), 13); \
+ Ama1 = Ba ^((~Be)& Bi ); \
+ Ame1 = Be ^((~Bi)& Bo ); \
+ Ami1 = Bi ^((~Bo)& Bu ); \
+ Amo1 = Bo ^((~Bu)& Ba ); \
+ Amu1 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Asa0^Da1), 21); \
+ Bu = ROL32((Ase0^De0), 1); \
+ Ba = ROL32((Asi0^Di0), 31); \
+ Be = ROL32((Aso0^Do1), 28); \
+ Bi = ROL32((Asu0^Du1), 20); \
+ Asa0 = Ba ^((~Be)& Bi ); \
+ Ase0 = Be ^((~Bi)& Bo ); \
+ Asi0 = Bi ^((~Bo)& Bu ); \
+ Aso0 = Bo ^((~Bu)& Ba ); \
+ Asu0 = Bu ^((~Ba)& Be ); \
+ Bo = ROL32((Asa1^Da0), 20); \
+ Bu = ROL32((Ase1^De1), 1); \
+ Ba = ROL32((Asi1^Di1), 31); \
+ Be = ROL32((Aso1^Do0), 27); \
+ Bi = ROL32((Asu1^Du0), 19); \
+ Asa1 = Ba ^((~Be)& Bi ); \
+ Ase1 = Be ^((~Bi)& Bo ); \
+ Asi1 = Bi ^((~Bo)& Bu ); \
+ Aso1 = Bo ^((~Bu)& Ba ); \
+ Asu1 = Bu ^((~Ba)& Be );
+
+void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds)
+{
+ UINT32 Da0, De0, Di0, Do0, Du0;
+ UINT32 Da1, De1, Di1, Do1, Du1;
+ UINT32 Ba, Be, Bi, Bo, Bu;
+ UINT32 Cx, Cy, Cz, Cw;
+ const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2;
+ UINT32 *stateAsHalfLanes = (UINT32*)state;
+ #define Aba0 stateAsHalfLanes[ 0]
+ #define Aba1 stateAsHalfLanes[ 1]
+ #define Abe0 stateAsHalfLanes[ 2]
+ #define Abe1 stateAsHalfLanes[ 3]
+ #define Abi0 stateAsHalfLanes[ 4]
+ #define Abi1 stateAsHalfLanes[ 5]
+ #define Abo0 stateAsHalfLanes[ 6]
+ #define Abo1 stateAsHalfLanes[ 7]
+ #define Abu0 stateAsHalfLanes[ 8]
+ #define Abu1 stateAsHalfLanes[ 9]
+ #define Aga0 stateAsHalfLanes[10]
+ #define Aga1 stateAsHalfLanes[11]
+ #define Age0 stateAsHalfLanes[12]
+ #define Age1 stateAsHalfLanes[13]
+ #define Agi0 stateAsHalfLanes[14]
+ #define Agi1 stateAsHalfLanes[15]
+ #define Ago0 stateAsHalfLanes[16]
+ #define Ago1 stateAsHalfLanes[17]
+ #define Agu0 stateAsHalfLanes[18]
+ #define Agu1 stateAsHalfLanes[19]
+ #define Aka0 stateAsHalfLanes[20]
+ #define Aka1 stateAsHalfLanes[21]
+ #define Ake0 stateAsHalfLanes[22]
+ #define Ake1 stateAsHalfLanes[23]
+ #define Aki0 stateAsHalfLanes[24]
+ #define Aki1 stateAsHalfLanes[25]
+ #define Ako0 stateAsHalfLanes[26]
+ #define Ako1 stateAsHalfLanes[27]
+ #define Aku0 stateAsHalfLanes[28]
+ #define Aku1 stateAsHalfLanes[29]
+ #define Ama0 stateAsHalfLanes[30]
+ #define Ama1 stateAsHalfLanes[31]
+ #define Ame0 stateAsHalfLanes[32]
+ #define Ame1 stateAsHalfLanes[33]
+ #define Ami0 stateAsHalfLanes[34]
+ #define Ami1 stateAsHalfLanes[35]
+ #define Amo0 stateAsHalfLanes[36]
+ #define Amo1 stateAsHalfLanes[37]
+ #define Amu0 stateAsHalfLanes[38]
+ #define Amu1 stateAsHalfLanes[39]
+ #define Asa0 stateAsHalfLanes[40]
+ #define Asa1 stateAsHalfLanes[41]
+ #define Ase0 stateAsHalfLanes[42]
+ #define Ase1 stateAsHalfLanes[43]
+ #define Asi0 stateAsHalfLanes[44]
+ #define Asi1 stateAsHalfLanes[45]
+ #define Aso0 stateAsHalfLanes[46]
+ #define Aso1 stateAsHalfLanes[47]
+ #define Asu0 stateAsHalfLanes[48]
+ #define Asu1 stateAsHalfLanes[49]
+
+ nRounds &= 3;
+ switch ( nRounds )
+ {
+ #define I0 Ba
+ #define I1 Be
+ #define T0 Bi
+ #define T1 Bo
+ #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \
+ I0 = (in0)[0]; I1 = (in0)[1]; \
+ T0 = (in1)[0]; T1 = (in1)[1]; \
+ (in0)[eo0] = T0; (in0)[eo0^1] = T1; \
+ T0 = (in2)[0]; T1 = (in2)[1]; \
+ (in1)[eo1] = T0; (in1)[eo1^1] = T1; \
+ T0 = (in3)[0]; T1 = (in3)[1]; \
+ (in2)[eo2] = T0; (in2)[eo2^1] = T1; \
+ (in3)[eo3] = I0; (in3)[eo3^1] = I1
+ #define SwapPI2( in0,in1,in2,in3 ) \
+ I0 = (in0)[0]; I1 = (in0)[1]; \
+ T0 = (in1)[0]; T1 = (in1)[1]; \
+ (in0)[1] = T0; (in0)[0] = T1; \
+ (in1)[1] = I0; (in1)[0] = I1; \
+ I0 = (in2)[0]; I1 = (in2)[1]; \
+ T0 = (in3)[0]; T1 = (in3)[1]; \
+ (in2)[1] = T0; (in2)[0] = T1; \
+ (in3)[1] = I0; (in3)[0] = I1
+ #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0
+
+ case 1:
+ SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 );
+ SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 );
+ SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 );
+ SwapEO( Ami0, Ami1 );
+ SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 );
+ SwapEO( Ako0, Ako1 );
+ SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 );
+ break;
+
+ case 2:
+ SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 );
+ SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 );
+ SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 );
+ SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 );
+ SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 );
+ break;
+
+ case 3:
+ SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 );
+ SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 );
+ SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 );
+ SwapEO( Ami0, Ami1 );
+ SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 );
+ SwapEO( Ako0, Ako1 );
+ SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 );
+ break;
+ #undef I0
+ #undef I1
+ #undef T0
+ #undef T1
+ #undef SwapPI13
+ #undef SwapPI2
+ #undef SwapEO
+ }
+
+ do
+ {
+ /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
+ switch ( nRounds )
+ {
+ case 0: KeccakRound0(); /* fall through */
+ case 3: KeccakRound1();
+ case 2: KeccakRound2();
+ case 1: KeccakRound3();
+ }
+ nRounds = 0;
+ }
+ while ( *pRoundConstants != 0xFF );
+
+ #undef Aba0
+ #undef Aba1
+ #undef Abe0
+ #undef Abe1
+ #undef Abi0
+ #undef Abi1
+ #undef Abo0
+ #undef Abo1
+ #undef Abu0
+ #undef Abu1
+ #undef Aga0
+ #undef Aga1
+ #undef Age0
+ #undef Age1
+ #undef Agi0
+ #undef Agi1
+ #undef Ago0
+ #undef Ago1
+ #undef Agu0
+ #undef Agu1
+ #undef Aka0
+ #undef Aka1
+ #undef Ake0
+ #undef Ake1
+ #undef Aki0
+ #undef Aki1
+ #undef Ako0
+ #undef Ako1
+ #undef Aku0
+ #undef Aku1
+ #undef Ama0
+ #undef Ama1
+ #undef Ame0
+ #undef Ame1
+ #undef Ami0
+ #undef Ami1
+ #undef Amo0
+ #undef Amo1
+ #undef Amu0
+ #undef Amu1
+ #undef Asa0
+ #undef Asa1
+ #undef Ase0
+ #undef Ase1
+ #undef Asi0
+ #undef Asi1
+ #undef Aso0
+ #undef Aso1
+ #undef Asu0
+ #undef Asu1
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Permute_12rounds(void *state)
+{
+ KeccakP1600_Permute_Nrounds(state, 12);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Permute_24rounds(void *state)
+{
+ KeccakP1600_Permute_Nrounds(state, 24);
+}
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include "KeccakSponge.h"
+
+#ifdef KeccakReference
+ #include "displayIntermediateValues.h"
+#endif
+
+#ifndef KeccakP200_excluded
+ #include "KeccakP-200-SnP.h"
+
+ #define prefix KeccakWidth200
+ #define SnP KeccakP200
+ #define SnP_width 200
+ #define SnP_Permute KeccakP200_Permute_18rounds
+ #if defined(KeccakF200_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP400_excluded
+ #include "KeccakP-400-SnP.h"
+
+ #define prefix KeccakWidth400
+ #define SnP KeccakP400
+ #define SnP_width 400
+ #define SnP_Permute KeccakP400_Permute_20rounds
+ #if defined(KeccakF400_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP800_excluded
+ #include "KeccakP-800-SnP.h"
+
+ #define prefix KeccakWidth800
+ #define SnP KeccakP800
+ #define SnP_width 800
+ #define SnP_Permute KeccakP800_Permute_22rounds
+ #if defined(KeccakF800_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+
+ #define prefix KeccakWidth1600
+ #define SnP KeccakP1600
+ #define SnP_width 1600
+ #define SnP_Permute KeccakP1600_Permute_24rounds
+ #if defined(KeccakF1600_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+
+ #define prefix KeccakWidth1600_12rounds
+ #define SnP KeccakP1600
+ #define SnP_width 1600
+ #define SnP_Permute KeccakP1600_Permute_12rounds
+ #if defined(KeccakP1600_12rounds_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakSponge_h_
+#define _KeccakSponge_h_
+
+/** General information
+ *
+ * The following type and functions are not actually implemented. Their
+ * documentation is generic, with the prefix Prefix replaced by
+ * - KeccakWidth200 for a sponge function based on Keccak-f[200]
+ * - KeccakWidth400 for a sponge function based on Keccak-f[400]
+ * - KeccakWidth800 for a sponge function based on Keccak-f[800]
+ * - KeccakWidth1600 for a sponge function based on Keccak-f[1600]
+ *
+ * In all these functions, the rate and capacity must sum to the width of the
+ * chosen permutation. For instance, to use the sponge function
+ * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination
+ * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(),
+ * KeccakWidth1600_SpongeAbsorbLastFewBits() and
+ * KeccakWidth1600_SpongeSqueeze().
+ *
+ * The Prefix_SpongeInstance contains the sponge instance attributes for use
+ * with the Prefix_Sponge* functions.
+ * It gathers the state processed by the permutation as well as the rate,
+ * the position of input/output bytes in the state and the phase
+ * (absorbing or squeezing).
+ */
+
+#ifdef DontReallyInclude_DocumentationOnly
+/** Function to evaluate the sponge function Keccak[r, c] in a single call.
+ * @param rate The value of the rate r.
+ * @param capacity The value of the capacity c.
+ * @param input Pointer to the input message (before the suffix).
+ * @param inputByteLen The length of the input message in bytes.
+ * @param suffix Byte containing from 0 to 7 suffix bits
+ * that must be absorbed after @a input.
+ * These <i>n</i> bits must be in the least significant bit positions.
+ * These bits must be delimited with a bit 1 at position <i>n</i>
+ * (counting from 0=LSB to 7=MSB) and followed by bits 0
+ * from position <i>n</i>+1 to position 7.
+ * Some examples:
+ * - If no bits are to be absorbed, then @a suffix must be 0x01.
+ * - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04.
+ * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32.
+ * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B.
+ * .
+ * @param output Pointer to the output buffer.
+ * @param outputByteLen The desired number of output bytes.
+ * @pre One must have r+c equal to the supported width of this implementation
+ * and the rate a multiple of 8 bits (one byte) in this implementation.
+ * @pre @a suffix ≠ 0x00
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen);
+
+/**
+ * Function to initialize the state of the Keccak[r, c] sponge function.
+ * The phase of the sponge function is set to absorbing.
+ * @param spongeInstance Pointer to the sponge instance to be initialized.
+ * @param rate The value of the rate r.
+ * @param capacity The value of the capacity c.
+ * @pre One must have r+c equal to the supported width of this implementation
+ * and the rate a multiple of 8 bits (one byte) in this implementation.
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity);
+
+/**
+ * Function to give input data bytes for the sponge function to absorb.
+ * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
+ * @param data Pointer to the input data.
+ * @param dataByteLen The number of input bytes provided in the input data.
+ * @pre The sponge function must be in the absorbing phase,
+ * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
+ * must not have been called before.
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen);
+
+/**
+ * Function to give input data bits for the sponge function to absorb
+ * and then to switch to the squeezing phase.
+ * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
+ * @param delimitedData Byte containing from 0 to 7 trailing bits
+ * that must be absorbed.
+ * These <i>n</i> bits must be in the least significant bit positions.
+ * These bits must be delimited with a bit 1 at position <i>n</i>
+ * (counting from 0=LSB to 7=MSB) and followed by bits 0
+ * from position <i>n</i>+1 to position 7.
+ * Some examples:
+ * - If no bits are to be absorbed, then @a delimitedData must be 0x01.
+ * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04.
+ * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32.
+ * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B.
+ * .
+ * @pre The sponge function must be in the absorbing phase,
+ * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
+ * must not have been called before.
+ * @pre @a delimitedData ≠ 0x00
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData);
+
+/**
+ * Function to squeeze output data from the sponge function.
+ * If the sponge function was in the absorbing phase, this function
+ * switches it to the squeezing phase
+ * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called.
+ * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
+ * @param data Pointer to the buffer where to store the output data.
+ * @param dataByteLen The number of output bytes desired.
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
+#endif
+
+#include <string.h>
+#include "align.h"
+
+#define KCP_DeclareSpongeStructure(prefix, size, alignment) \
+ ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \
+ unsigned char state[size]; \
+ unsigned int rate; \
+ unsigned int byteIOIndex; \
+ int squeezing; \
+ } prefix##_SpongeInstance;
+
+#define KCP_DeclareSpongeFunctions(prefix) \
+ int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \
+ int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \
+ int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \
+ int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \
+ int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
+
+#ifndef KeccakP200_excluded
+ #include "KeccakP-200-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth200)
+#endif
+
+#ifndef KeccakP400_excluded
+ #include "KeccakP-400-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth400)
+#endif
+
+#ifndef KeccakP800_excluded
+ #include "KeccakP-800-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth800)
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth1600)
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds)
+#endif
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define JOIN0(a, b) a ## b
+#define JOIN(a, b) JOIN0(a, b)
+
+#define Sponge JOIN(prefix, _Sponge)
+#define SpongeInstance JOIN(prefix, _SpongeInstance)
+#define SpongeInitialize JOIN(prefix, _SpongeInitialize)
+#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb)
+#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits)
+#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze)
+
+#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes)
+#define SnP_stateAlignment JOIN(SnP, _stateAlignment)
+#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
+#define SnP_Initialize JOIN(SnP, _Initialize)
+#define SnP_AddByte JOIN(SnP, _AddByte)
+#define SnP_AddBytes JOIN(SnP, _AddBytes)
+#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
+
+int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen)
+{
+ ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes];
+ unsigned int partialBlock;
+ const unsigned char *curInput = input;
+ unsigned char *curOutput = output;
+ unsigned int rateInBytes = rate/8;
+
+ if (rate+capacity != SnP_width)
+ return 1;
+ if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0))
+ return 1;
+ if (suffix == 0)
+ return 1;
+
+ /* Initialize the state */
+ SnP_StaticInitialize();
+ SnP_Initialize(state);
+
+ /* First, absorb whole blocks */
+#ifdef SnP_FastLoop_Absorb
+ if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) {
+ /* fast lane: whole lane rate */
+ size_t j;
+ j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen);
+ curInput += j;
+ inputByteLen -= j;
+ }
+#endif
+ while(inputByteLen >= (size_t)rateInBytes) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curInput, rateInBytes);
+ #endif
+ SnP_AddBytes(state, curInput, 0, rateInBytes);
+ SnP_Permute(state);
+ curInput += rateInBytes;
+ inputByteLen -= rateInBytes;
+ }
+
+ /* Then, absorb what remains */
+ partialBlock = (unsigned int)inputByteLen;
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock);
+ #endif
+ SnP_AddBytes(state, curInput, 0, partialBlock);
+
+ /* Finally, absorb the suffix */
+ #ifdef KeccakReference
+ {
+ unsigned char delimitedData1[1];
+ delimitedData1[0] = suffix;
+ displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1);
+ }
+ #endif
+ /* Last few bits, whose delimiter coincides with first bit of padding */
+ SnP_AddByte(state, suffix, partialBlock);
+ /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */
+ if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1)))
+ SnP_Permute(state);
+ /* Second bit of padding */
+ SnP_AddByte(state, 0x80, rateInBytes-1);
+ #ifdef KeccakReference
+ {
+ unsigned char block[SnP_width/8];
+ memset(block, 0, SnP_width/8);
+ block[rateInBytes-1] = 0x80;
+ displayBytes(1, "Second bit of padding", block, rateInBytes);
+ }
+ #endif
+ SnP_Permute(state);
+ #ifdef KeccakReference
+ displayText(1, "--- Switching to squeezing phase ---");
+ #endif
+
+ /* First, output whole blocks */
+ while(outputByteLen > (size_t)rateInBytes) {
+ SnP_ExtractBytes(state, curOutput, 0, rateInBytes);
+ SnP_Permute(state);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block", curOutput, rateInBytes);
+ #endif
+ curOutput += rateInBytes;
+ outputByteLen -= rateInBytes;
+ }
+
+ /* Finally, output what remains */
+ partialBlock = (unsigned int)outputByteLen;
+ SnP_ExtractBytes(state, curOutput, 0, partialBlock);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block (part)", curOutput, partialBlock);
+ #endif
+
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+/* ---------------------------------------------------------------- */
+/* ---------------------------------------------------------------- */
+
+int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity)
+{
+ if (rate+capacity != SnP_width)
+ return 1;
+ if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0))
+ return 1;
+ SnP_StaticInitialize();
+ SnP_Initialize(instance->state);
+ instance->rate = rate;
+ instance->byteIOIndex = 0;
+ instance->squeezing = 0;
+
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen)
+{
+ size_t i, j;
+ unsigned int partialBlock;
+ const unsigned char *curData;
+ unsigned int rateInBytes = instance->rate/8;
+
+ if (instance->squeezing)
+ return 1; /* Too late for additional input */
+
+ i = 0;
+ curData = data;
+ while(i < dataByteLen) {
+ if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) {
+#ifdef SnP_FastLoop_Absorb
+ /* processing full blocks first */
+ if ((rateInBytes % (SnP_width/200)) == 0) {
+ /* fast lane: whole lane rate */
+ j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i);
+ i += j;
+ curData += j;
+ }
+ else {
+#endif
+ for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, rateInBytes);
+ #endif
+ SnP_AddBytes(instance->state, curData, 0, rateInBytes);
+ SnP_Permute(instance->state);
+ curData+=rateInBytes;
+ }
+ i = dataByteLen - j;
+#ifdef SnP_FastLoop_Absorb
+ }
+#endif
+ }
+ else {
+ /* normal lane: using the message queue */
+ partialBlock = (unsigned int)(dataByteLen - i);
+ if (partialBlock+instance->byteIOIndex > rateInBytes)
+ partialBlock = rateInBytes-instance->byteIOIndex;
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed (part)", curData, partialBlock);
+ #endif
+ i += partialBlock;
+
+ SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock);
+ curData += partialBlock;
+ instance->byteIOIndex += partialBlock;
+ if (instance->byteIOIndex == rateInBytes) {
+ SnP_Permute(instance->state);
+ instance->byteIOIndex = 0;
+ }
+ }
+ }
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData)
+{
+ unsigned int rateInBytes = instance->rate/8;
+
+ if (delimitedData == 0)
+ return 1;
+ if (instance->squeezing)
+ return 1; /* Too late for additional input */
+
+ #ifdef KeccakReference
+ {
+ unsigned char delimitedData1[1];
+ delimitedData1[0] = delimitedData;
+ displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1);
+ }
+ #endif
+ /* Last few bits, whose delimiter coincides with first bit of padding */
+ SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex);
+ /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */
+ if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1)))
+ SnP_Permute(instance->state);
+ /* Second bit of padding */
+ SnP_AddByte(instance->state, 0x80, rateInBytes-1);
+ #ifdef KeccakReference
+ {
+ unsigned char block[SnP_width/8];
+ memset(block, 0, SnP_width/8);
+ block[rateInBytes-1] = 0x80;
+ displayBytes(1, "Second bit of padding", block, rateInBytes);
+ }
+ #endif
+ SnP_Permute(instance->state);
+ instance->byteIOIndex = 0;
+ instance->squeezing = 1;
+ #ifdef KeccakReference
+ displayText(1, "--- Switching to squeezing phase ---");
+ #endif
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen)
+{
+ size_t i, j;
+ unsigned int partialBlock;
+ unsigned int rateInBytes = instance->rate/8;
+ unsigned char *curData;
+
+ if (!instance->squeezing)
+ SpongeAbsorbLastFewBits(instance, 0x01);
+
+ i = 0;
+ curData = data;
+ while(i < dataByteLen) {
+ if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) {
+ for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
+ SnP_Permute(instance->state);
+ SnP_ExtractBytes(instance->state, curData, 0, rateInBytes);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block", curData, rateInBytes);
+ #endif
+ curData+=rateInBytes;
+ }
+ i = dataByteLen - j;
+ }
+ else {
+ /* normal lane: using the message queue */
+ if (instance->byteIOIndex == rateInBytes) {
+ SnP_Permute(instance->state);
+ instance->byteIOIndex = 0;
+ }
+ partialBlock = (unsigned int)(dataByteLen - i);
+ if (partialBlock+instance->byteIOIndex > rateInBytes)
+ partialBlock = rateInBytes-instance->byteIOIndex;
+ i += partialBlock;
+
+ SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block (part)", curData, partialBlock);
+ #endif
+ curData += partialBlock;
+ instance->byteIOIndex += partialBlock;
+ }
+ }
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+#undef Sponge
+#undef SpongeInstance
+#undef SpongeInitialize
+#undef SpongeAbsorb
+#undef SpongeAbsorbLastFewBits
+#undef SpongeSqueeze
+#undef SnP_stateSizeInBytes
+#undef SnP_stateAlignment
+#undef SnP_StaticInitialize
+#undef SnP_Initialize
+#undef SnP_AddByte
+#undef SnP_AddBytes
+#undef SnP_ExtractBytes
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _SnP_Relaned_h_
+#define _SnP_Relaned_h_
+
+#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \
+ SnP_AddBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ const unsigned char *_curData = (data); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curData += _bytesInLane; \
+ } \
+ } \
+ }
+
+#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \
+ SnP_OverwriteBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ const unsigned char *_curData = (data); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curData += _bytesInLane; \
+ } \
+ } \
+ }
+
+#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \
+ SnP_ExtractBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ unsigned char *_curData = (data); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curData += _bytesInLane; \
+ } \
+ } \
+ }
+
+#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \
+ SnP_ExtractAndAddBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ const unsigned char *_curInput = (input); \
+ unsigned char *_curOutput = (output); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curInput += _bytesInLane; \
+ _curOutput += _bytesInLane; \
+ } \
+ } \
+ }
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _align_h_
+#define _align_h_
+
+/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
+#ifdef ALIGN
+#undef ALIGN
+#endif
+
+#if defined(__GNUC__)
+#define ALIGN(x) __attribute__ ((aligned(x)))
+#elif defined(_MSC_VER)
+#define ALIGN(x) __declspec(align(x))
+#elif defined(__ARMCC_VERSION)
+#define ALIGN(x) __align(x)
+#else
+#define ALIGN(x)
+#endif
+
+#endif
--- /dev/null
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+ 1. source code distributions include the above copyright notice, this
+ list of conditions and the following disclaimer;
+
+ 2. binary distributions include the above copyright notice, this list
+ of conditions and the following disclaimer in their documentation;
+
+ 3. the name of the copyright holder is not used to endorse products
+ built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+ Changes for ARM 9/9/2010
+*/
+
+#ifndef _BRG_ENDIAN_H
+#define _BRG_ENDIAN_H
+
+#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+
+#if 0
+/* Include files where endian defines and byteswap functions may reside */
+#if defined( __sun )
+# include <sys/isa_defs.h>
+#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
+# include <sys/endian.h>
+#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
+ defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
+# include <machine/endian.h>
+#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
+# if !defined( __MINGW32__ ) && !defined( _AIX )
+# include <endian.h>
+# if !defined( __BEOS__ )
+# include <byteswap.h>
+# endif
+# endif
+#endif
+#endif
+
+/* Now attempt to set the define for platform byte order using any */
+/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
+/* seem to encompass most endian symbol definitions */
+
+#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
+# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
+# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( _BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( _LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
+# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
+# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+/* if the platform byte order could not be determined, then try to */
+/* set this define using common machine defines */
+#if !defined(PLATFORM_BYTE_ORDER)
+
+#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
+ defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
+ defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
+ defined( vax ) || defined( vms ) || defined( VMS ) || \
+ defined( __VMS ) || defined( _M_X64 )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+
+#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
+ defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
+ defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
+ defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
+ defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
+ defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
+ defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+
+#elif defined(__arm__)
+# ifdef __BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# else
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif 1 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
+#endif
+
+#endif
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+#include "KeccakHash.h"
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
+{
+ HashReturn result;
+
+ if (delimitedSuffix == 0)
+ return FAIL;
+ result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity);
+ if (result != SUCCESS)
+ return result;
+ instance->fixedOutputLength = hashbitlen;
+ instance->delimitedSuffix = delimitedSuffix;
+ return SUCCESS;
+}
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, DataLength databitlen)
+{
+ if ((databitlen % 8) == 0)
+ return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
+ else {
+ HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
+ if (ret == SUCCESS) {
+ /* The last partial byte is assumed to be aligned on the least significant bits */
+ unsigned char lastByte = data[databitlen/8];
+ /* Concatenate the last few bits provided here with those of the suffix */
+ unsigned short delimitedLastBytes = (unsigned short)((unsigned short)lastByte | ((unsigned short)instance->delimitedSuffix << (databitlen % 8)));
+ if ((delimitedLastBytes & 0xFF00) == 0x0000) {
+ instance->delimitedSuffix = delimitedLastBytes & 0xFF;
+ }
+ else {
+ unsigned char oneByte[1];
+ oneByte[0] = delimitedLastBytes & 0xFF;
+ ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1);
+ instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF;
+ }
+ }
+ return ret;
+ }
+}
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval)
+{
+ HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix);
+ if (ret == SUCCESS)
+ return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8);
+ else
+ return ret;
+}
+
+/* ---------------------------------------------------------------- */
+
+HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, DataLength databitlen)
+{
+ if ((databitlen % 8) != 0)
+ return FAIL;
+ return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8);
+}
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakHashInterface_h_
+#define _KeccakHashInterface_h_
+
+#ifndef KeccakP1600_excluded
+
+#include "KeccakSponge.h"
+#include <string.h>
+
+typedef unsigned char BitSequence;
+typedef size_t DataLength;
+typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn;
+
+typedef struct {
+ KeccakWidth1600_SpongeInstance sponge;
+ unsigned int fixedOutputLength;
+ unsigned char delimitedSuffix;
+} Keccak_HashInstance;
+
+/**
+ * Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode.
+ * @param hashInstance Pointer to the hash instance to be initialized.
+ * @param rate The value of the rate r.
+ * @param capacity The value of the capacity c.
+ * @param hashbitlen The desired number of output bits,
+ * or 0 for an arbitrarily-long output.
+ * @param delimitedSuffix Bits that will be automatically appended to the end
+ * of the input message, as in domain separation.
+ * This is a byte containing from 0 to 7 bits
+ * formatted like the @a delimitedData parameter of
+ * the Keccak_SpongeAbsorbLastFewBits() function.
+ * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);
+
+/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F)
+
+/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F)
+
+/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06)
+
+/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06)
+
+/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06)
+
+/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard.
+ */
+#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06)
+
+/**
+ * Function to give input data to be absorbed.
+ * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
+ * @param data Pointer to the input data.
+ * When @a databitLen is not a multiple of 8, the last bits of data must be
+ * in the least significant bits of the last byte (little-endian convention).
+ * @param databitLen The number of input bits provided in the input data.
+ * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, DataLength databitlen);
+
+/**
+ * Function to call after all input blocks have been input and to get
+ * output bits if the length was specified when calling Keccak_HashInitialize().
+ * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
+ * If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of
+ * output bits is equal to @a hashbitlen.
+ * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits
+ * must be extracted using the Keccak_HashSqueeze() function.
+ * @param hashval Pointer to the buffer where to store the output data.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval);
+
+ /**
+ * Function to squeeze output data.
+ * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
+ * @param data Pointer to the buffer where to store the output data.
+ * @param databitlen The number of output bits desired (must be a multiple of 8).
+ * @pre Keccak_HashFinal() must have been already called.
+ * @pre @a databitlen is a multiple of 8.
+ * @return SUCCESS if successful, FAIL otherwise.
+ */
+HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, DataLength databitlen);
+
+#endif
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define declareABCDE \
+ UINT64 Aba, Abe, Abi, Abo, Abu; \
+ UINT64 Aga, Age, Agi, Ago, Agu; \
+ UINT64 Aka, Ake, Aki, Ako, Aku; \
+ UINT64 Ama, Ame, Ami, Amo, Amu; \
+ UINT64 Asa, Ase, Asi, Aso, Asu; \
+ UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
+ UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
+ UINT64 Bka, Bke, Bki, Bko, Bku; \
+ UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
+ UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
+ UINT64 Ca, Ce, Ci, Co, Cu; \
+ UINT64 Da, De, Di, Do, Du; \
+ UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
+ UINT64 Ega, Ege, Egi, Ego, Egu; \
+ UINT64 Eka, Eke, Eki, Eko, Eku; \
+ UINT64 Ema, Eme, Emi, Emo, Emu; \
+ UINT64 Esa, Ese, Esi, Eso, Esu; \
+
+#define prepareTheta \
+ Ca = Aba^Aga^Aka^Ama^Asa; \
+ Ce = Abe^Age^Ake^Ame^Ase; \
+ Ci = Abi^Agi^Aki^Ami^Asi; \
+ Co = Abo^Ago^Ako^Amo^Aso; \
+ Cu = Abu^Agu^Aku^Amu^Asu; \
+
+#ifdef UseBebigokimisa
+/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^( Bbe | Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ Ca = E##ba; \
+ E##be = Bbe ^((~Bbi)| Bbo ); \
+ Ce = E##be; \
+ E##bi = Bbi ^( Bbo & Bbu ); \
+ Ci = E##bi; \
+ E##bo = Bbo ^( Bbu | Bba ); \
+ Co = E##bo; \
+ E##bu = Bbu ^( Bba & Bbe ); \
+ Cu = E##bu; \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^( Bge | Bgi ); \
+ Ca ^= E##ga; \
+ E##ge = Bge ^( Bgi & Bgo ); \
+ Ce ^= E##ge; \
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
+ Ci ^= E##gi; \
+ E##go = Bgo ^( Bgu | Bga ); \
+ Co ^= E##go; \
+ E##gu = Bgu ^( Bga & Bge ); \
+ Cu ^= E##gu; \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^( Bke | Bki ); \
+ Ca ^= E##ka; \
+ E##ke = Bke ^( Bki & Bko ); \
+ Ce ^= E##ke; \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ Ci ^= E##ki; \
+ E##ko = (~Bko)^( Bku | Bka ); \
+ Co ^= E##ko; \
+ E##ku = Bku ^( Bka & Bke ); \
+ Cu ^= E##ku; \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^( Bme & Bmi ); \
+ Ca ^= E##ma; \
+ E##me = Bme ^( Bmi | Bmo ); \
+ Ce ^= E##me; \
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
+ Ci ^= E##mi; \
+ E##mo = (~Bmo)^( Bmu & Bma ); \
+ Co ^= E##mo; \
+ E##mu = Bmu ^( Bma | Bme ); \
+ Cu ^= E##mu; \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ Ca ^= E##sa; \
+ E##se = (~Bse)^( Bsi | Bso ); \
+ Ce ^= E##se; \
+ E##si = Bsi ^( Bso & Bsu ); \
+ Ci ^= E##si; \
+ E##so = Bso ^( Bsu | Bsa ); \
+ Co ^= E##so; \
+ E##su = Bsu ^( Bsa & Bse ); \
+ Cu ^= E##su; \
+\
+
+/* --- Code for round (lane complementing pattern 'bebigokimisa') */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^( Bbe | Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ E##be = Bbe ^((~Bbi)| Bbo ); \
+ E##bi = Bbi ^( Bbo & Bbu ); \
+ E##bo = Bbo ^( Bbu | Bba ); \
+ E##bu = Bbu ^( Bba & Bbe ); \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^( Bge | Bgi ); \
+ E##ge = Bge ^( Bgi & Bgo ); \
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
+ E##go = Bgo ^( Bgu | Bga ); \
+ E##gu = Bgu ^( Bga & Bge ); \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^( Bke | Bki ); \
+ E##ke = Bke ^( Bki & Bko ); \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ E##ko = (~Bko)^( Bku | Bka ); \
+ E##ku = Bku ^( Bka & Bke ); \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^( Bme & Bmi ); \
+ E##me = Bme ^( Bmi | Bmo ); \
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
+ E##mo = (~Bmo)^( Bmu & Bma ); \
+ E##mu = Bmu ^( Bma | Bme ); \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ E##se = (~Bse)^( Bsi | Bso ); \
+ E##si = Bsi ^( Bso & Bsu ); \
+ E##so = Bso ^( Bsu | Bsa ); \
+ E##su = Bsu ^( Bsa & Bse ); \
+\
+
+#else /* UseBebigokimisa */
+/* --- Code for round, with prepare-theta */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^((~Bbe)& Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ Ca = E##ba; \
+ E##be = Bbe ^((~Bbi)& Bbo ); \
+ Ce = E##be; \
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
+ Ci = E##bi; \
+ E##bo = Bbo ^((~Bbu)& Bba ); \
+ Co = E##bo; \
+ E##bu = Bbu ^((~Bba)& Bbe ); \
+ Cu = E##bu; \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^((~Bge)& Bgi ); \
+ Ca ^= E##ga; \
+ E##ge = Bge ^((~Bgi)& Bgo ); \
+ Ce ^= E##ge; \
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
+ Ci ^= E##gi; \
+ E##go = Bgo ^((~Bgu)& Bga ); \
+ Co ^= E##go; \
+ E##gu = Bgu ^((~Bga)& Bge ); \
+ Cu ^= E##gu; \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^((~Bke)& Bki ); \
+ Ca ^= E##ka; \
+ E##ke = Bke ^((~Bki)& Bko ); \
+ Ce ^= E##ke; \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ Ci ^= E##ki; \
+ E##ko = Bko ^((~Bku)& Bka ); \
+ Co ^= E##ko; \
+ E##ku = Bku ^((~Bka)& Bke ); \
+ Cu ^= E##ku; \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^((~Bme)& Bmi ); \
+ Ca ^= E##ma; \
+ E##me = Bme ^((~Bmi)& Bmo ); \
+ Ce ^= E##me; \
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
+ Ci ^= E##mi; \
+ E##mo = Bmo ^((~Bmu)& Bma ); \
+ Co ^= E##mo; \
+ E##mu = Bmu ^((~Bma)& Bme ); \
+ Cu ^= E##mu; \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ Ca ^= E##sa; \
+ E##se = Bse ^((~Bsi)& Bso ); \
+ Ce ^= E##se; \
+ E##si = Bsi ^((~Bso)& Bsu ); \
+ Ci ^= E##si; \
+ E##so = Bso ^((~Bsu)& Bsa ); \
+ Co ^= E##so; \
+ E##su = Bsu ^((~Bsa)& Bse ); \
+ Cu ^= E##su; \
+\
+
+/* --- Code for round */
+/* --- 64-bit lanes mapped to 64-bit words */
+#define thetaRhoPiChiIota(i, A, E) \
+ Da = Cu^ROL64(Ce, 1); \
+ De = Ca^ROL64(Ci, 1); \
+ Di = Ce^ROL64(Co, 1); \
+ Do = Ci^ROL64(Cu, 1); \
+ Du = Co^ROL64(Ca, 1); \
+\
+ A##ba ^= Da; \
+ Bba = A##ba; \
+ A##ge ^= De; \
+ Bbe = ROL64(A##ge, 44); \
+ A##ki ^= Di; \
+ Bbi = ROL64(A##ki, 43); \
+ A##mo ^= Do; \
+ Bbo = ROL64(A##mo, 21); \
+ A##su ^= Du; \
+ Bbu = ROL64(A##su, 14); \
+ E##ba = Bba ^((~Bbe)& Bbi ); \
+ E##ba ^= KeccakF1600RoundConstants[i]; \
+ E##be = Bbe ^((~Bbi)& Bbo ); \
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
+ E##bo = Bbo ^((~Bbu)& Bba ); \
+ E##bu = Bbu ^((~Bba)& Bbe ); \
+\
+ A##bo ^= Do; \
+ Bga = ROL64(A##bo, 28); \
+ A##gu ^= Du; \
+ Bge = ROL64(A##gu, 20); \
+ A##ka ^= Da; \
+ Bgi = ROL64(A##ka, 3); \
+ A##me ^= De; \
+ Bgo = ROL64(A##me, 45); \
+ A##si ^= Di; \
+ Bgu = ROL64(A##si, 61); \
+ E##ga = Bga ^((~Bge)& Bgi ); \
+ E##ge = Bge ^((~Bgi)& Bgo ); \
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
+ E##go = Bgo ^((~Bgu)& Bga ); \
+ E##gu = Bgu ^((~Bga)& Bge ); \
+\
+ A##be ^= De; \
+ Bka = ROL64(A##be, 1); \
+ A##gi ^= Di; \
+ Bke = ROL64(A##gi, 6); \
+ A##ko ^= Do; \
+ Bki = ROL64(A##ko, 25); \
+ A##mu ^= Du; \
+ Bko = ROL64(A##mu, 8); \
+ A##sa ^= Da; \
+ Bku = ROL64(A##sa, 18); \
+ E##ka = Bka ^((~Bke)& Bki ); \
+ E##ke = Bke ^((~Bki)& Bko ); \
+ E##ki = Bki ^((~Bko)& Bku ); \
+ E##ko = Bko ^((~Bku)& Bka ); \
+ E##ku = Bku ^((~Bka)& Bke ); \
+\
+ A##bu ^= Du; \
+ Bma = ROL64(A##bu, 27); \
+ A##ga ^= Da; \
+ Bme = ROL64(A##ga, 36); \
+ A##ke ^= De; \
+ Bmi = ROL64(A##ke, 10); \
+ A##mi ^= Di; \
+ Bmo = ROL64(A##mi, 15); \
+ A##so ^= Do; \
+ Bmu = ROL64(A##so, 56); \
+ E##ma = Bma ^((~Bme)& Bmi ); \
+ E##me = Bme ^((~Bmi)& Bmo ); \
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
+ E##mo = Bmo ^((~Bmu)& Bma ); \
+ E##mu = Bmu ^((~Bma)& Bme ); \
+\
+ A##bi ^= Di; \
+ Bsa = ROL64(A##bi, 62); \
+ A##go ^= Do; \
+ Bse = ROL64(A##go, 55); \
+ A##ku ^= Du; \
+ Bsi = ROL64(A##ku, 39); \
+ A##ma ^= Da; \
+ Bso = ROL64(A##ma, 41); \
+ A##se ^= De; \
+ Bsu = ROL64(A##se, 2); \
+ E##sa = Bsa ^((~Bse)& Bsi ); \
+ E##se = Bse ^((~Bsi)& Bso ); \
+ E##si = Bsi ^((~Bso)& Bsu ); \
+ E##so = Bso ^((~Bsu)& Bsa ); \
+ E##su = Bsu ^((~Bsa)& Bse ); \
+\
+
+#endif /* UseBebigokimisa */
+
+#define copyFromState(X, state) \
+ X##ba = state[ 0]; \
+ X##be = state[ 1]; \
+ X##bi = state[ 2]; \
+ X##bo = state[ 3]; \
+ X##bu = state[ 4]; \
+ X##ga = state[ 5]; \
+ X##ge = state[ 6]; \
+ X##gi = state[ 7]; \
+ X##go = state[ 8]; \
+ X##gu = state[ 9]; \
+ X##ka = state[10]; \
+ X##ke = state[11]; \
+ X##ki = state[12]; \
+ X##ko = state[13]; \
+ X##ku = state[14]; \
+ X##ma = state[15]; \
+ X##me = state[16]; \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+
+#define copyToState(state, X) \
+ state[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ state[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ state[ 7] = X##gi; \
+ state[ 8] = X##go; \
+ state[ 9] = X##gu; \
+ state[10] = X##ka; \
+ state[11] = X##ke; \
+ state[12] = X##ki; \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ state[15] = X##ma; \
+ state[16] = X##me; \
+ state[17] = X##mi; \
+ state[18] = X##mo; \
+ state[19] = X##mu; \
+ state[20] = X##sa; \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ state[23] = X##so; \
+ state[24] = X##su; \
+
+#define copyStateVariables(X, Y) \
+ X##ba = Y##ba; \
+ X##be = Y##be; \
+ X##bi = Y##bi; \
+ X##bo = Y##bo; \
+ X##bu = Y##bu; \
+ X##ga = Y##ga; \
+ X##ge = Y##ge; \
+ X##gi = Y##gi; \
+ X##go = Y##go; \
+ X##gu = Y##gu; \
+ X##ka = Y##ka; \
+ X##ke = Y##ke; \
+ X##ki = Y##ki; \
+ X##ko = Y##ko; \
+ X##ku = Y##ku; \
+ X##ma = Y##ma; \
+ X##me = Y##me; \
+ X##mi = Y##mi; \
+ X##mo = Y##mo; \
+ X##mu = Y##mu; \
+ X##sa = Y##sa; \
+ X##se = Y##se; \
+ X##si = Y##si; \
+ X##so = Y##so; \
+ X##su = Y##su; \
+
+#define copyFromStateAndAdd(X, state, input, laneCount) \
+ if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ if (laneCount < 1) { \
+ X##ba = state[ 0]; \
+ } \
+ else { \
+ X##ba = state[ 0]^input[ 0]; \
+ } \
+ X##be = state[ 1]; \
+ X##bi = state[ 2]; \
+ } \
+ else { \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ if (laneCount < 3) { \
+ X##bi = state[ 2]; \
+ } \
+ else { \
+ X##bi = state[ 2]^input[ 2]; \
+ } \
+ } \
+ X##bo = state[ 3]; \
+ X##bu = state[ 4]; \
+ X##ga = state[ 5]; \
+ X##ge = state[ 6]; \
+ } \
+ else { \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ if (laneCount < 6) { \
+ if (laneCount < 5) { \
+ X##bu = state[ 4]; \
+ } \
+ else { \
+ X##bu = state[ 4]^input[ 4]; \
+ } \
+ X##ga = state[ 5]; \
+ X##ge = state[ 6]; \
+ } \
+ else { \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ if (laneCount < 7) { \
+ X##ge = state[ 6]; \
+ } \
+ else { \
+ X##ge = state[ 6]^input[ 6]; \
+ } \
+ } \
+ } \
+ X##gi = state[ 7]; \
+ X##go = state[ 8]; \
+ X##gu = state[ 9]; \
+ X##ka = state[10]; \
+ X##ke = state[11]; \
+ X##ki = state[12]; \
+ X##ko = state[13]; \
+ X##ku = state[14]; \
+ } \
+ else { \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ if (laneCount < 9) { \
+ X##go = state[ 8]; \
+ } \
+ else { \
+ X##go = state[ 8]^input[ 8]; \
+ } \
+ X##gu = state[ 9]; \
+ X##ka = state[10]; \
+ } \
+ else { \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ if (laneCount < 11) { \
+ X##ka = state[10]; \
+ } \
+ else { \
+ X##ka = state[10]^input[10]; \
+ } \
+ } \
+ X##ke = state[11]; \
+ X##ki = state[12]; \
+ X##ko = state[13]; \
+ X##ku = state[14]; \
+ } \
+ else { \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ X##ka = state[10]^input[10]; \
+ X##ke = state[11]^input[11]; \
+ if (laneCount < 14) { \
+ if (laneCount < 13) { \
+ X##ki = state[12]; \
+ } \
+ else { \
+ X##ki = state[12]^input[12]; \
+ } \
+ X##ko = state[13]; \
+ X##ku = state[14]; \
+ } \
+ else { \
+ X##ki = state[12]^input[12]; \
+ X##ko = state[13]^input[13]; \
+ if (laneCount < 15) { \
+ X##ku = state[14]; \
+ } \
+ else { \
+ X##ku = state[14]^input[14]; \
+ } \
+ } \
+ } \
+ } \
+ X##ma = state[15]; \
+ X##me = state[16]; \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ X##so = state[23]; \
+ X##su = state[24]; \
+ } \
+ else { \
+ X##ba = state[ 0]^input[ 0]; \
+ X##be = state[ 1]^input[ 1]; \
+ X##bi = state[ 2]^input[ 2]; \
+ X##bo = state[ 3]^input[ 3]; \
+ X##bu = state[ 4]^input[ 4]; \
+ X##ga = state[ 5]^input[ 5]; \
+ X##ge = state[ 6]^input[ 6]; \
+ X##gi = state[ 7]^input[ 7]; \
+ X##go = state[ 8]^input[ 8]; \
+ X##gu = state[ 9]^input[ 9]; \
+ X##ka = state[10]^input[10]; \
+ X##ke = state[11]^input[11]; \
+ X##ki = state[12]^input[12]; \
+ X##ko = state[13]^input[13]; \
+ X##ku = state[14]^input[14]; \
+ X##ma = state[15]^input[15]; \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ if (laneCount < 17) { \
+ X##me = state[16]; \
+ } \
+ else { \
+ X##me = state[16]^input[16]; \
+ } \
+ X##mi = state[17]; \
+ X##mo = state[18]; \
+ } \
+ else { \
+ X##me = state[16]^input[16]; \
+ X##mi = state[17]^input[17]; \
+ if (laneCount < 19) { \
+ X##mo = state[18]; \
+ } \
+ else { \
+ X##mo = state[18]^input[18]; \
+ } \
+ } \
+ X##mu = state[19]; \
+ X##sa = state[20]; \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ } \
+ else { \
+ X##me = state[16]^input[16]; \
+ X##mi = state[17]^input[17]; \
+ X##mo = state[18]^input[18]; \
+ X##mu = state[19]^input[19]; \
+ if (laneCount < 22) { \
+ if (laneCount < 21) { \
+ X##sa = state[20]; \
+ } \
+ else { \
+ X##sa = state[20]^input[20]; \
+ } \
+ X##se = state[21]; \
+ X##si = state[22]; \
+ } \
+ else { \
+ X##sa = state[20]^input[20]; \
+ X##se = state[21]^input[21]; \
+ if (laneCount < 23) { \
+ X##si = state[22]; \
+ } \
+ else { \
+ X##si = state[22]^input[22]; \
+ } \
+ } \
+ } \
+ X##so = state[23]; \
+ X##su = state[24]; \
+ } \
+ else { \
+ X##me = state[16]^input[16]; \
+ X##mi = state[17]^input[17]; \
+ X##mo = state[18]^input[18]; \
+ X##mu = state[19]^input[19]; \
+ X##sa = state[20]^input[20]; \
+ X##se = state[21]^input[21]; \
+ X##si = state[22]^input[22]; \
+ X##so = state[23]^input[23]; \
+ if (laneCount < 25) { \
+ X##su = state[24]; \
+ } \
+ else { \
+ X##su = state[24]^input[24]; \
+ } \
+ } \
+ }
+
+#define addInput(X, input, laneCount) \
+ if (laneCount == 21) { \
+ X##ba ^= input[ 0]; \
+ X##be ^= input[ 1]; \
+ X##bi ^= input[ 2]; \
+ X##bo ^= input[ 3]; \
+ X##bu ^= input[ 4]; \
+ X##ga ^= input[ 5]; \
+ X##ge ^= input[ 6]; \
+ X##gi ^= input[ 7]; \
+ X##go ^= input[ 8]; \
+ X##gu ^= input[ 9]; \
+ X##ka ^= input[10]; \
+ X##ke ^= input[11]; \
+ X##ki ^= input[12]; \
+ X##ko ^= input[13]; \
+ X##ku ^= input[14]; \
+ X##ma ^= input[15]; \
+ X##me ^= input[16]; \
+ X##mi ^= input[17]; \
+ X##mo ^= input[18]; \
+ X##mu ^= input[19]; \
+ X##sa ^= input[20]; \
+ } \
+ else if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ if (laneCount < 1) { \
+ } \
+ else { \
+ X##ba ^= input[ 0]; \
+ } \
+ } \
+ else { \
+ X##ba ^= input[ 0]; \
+ X##be ^= input[ 1]; \
+ if (laneCount < 3) { \
+ } \
+ else { \
+ X##bi ^= input[ 2]; \
+ } \
+ } \
+ } \
+ else { \
+ X##ba ^= input[ 0]; \
+ X##be ^= input[ 1]; \
+ X##bi ^= input[ 2]; \
+ X##bo ^= input[ 3]; \
+ if (laneCount < 6) { \
+ if (laneCount < 5) { \
+ } \
+ else { \
+ X##bu ^= input[ 4]; \
+ } \
+ } \
+ else { \
+ X##bu ^= input[ 4]; \
+ X##ga ^= input[ 5]; \
+ if (laneCount < 7) { \
+ } \
+ else { \
+ X##ge ^= input[ 6]; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ X##ba ^= input[ 0]; \
+ X##be ^= input[ 1]; \
+ X##bi ^= input[ 2]; \
+ X##bo ^= input[ 3]; \
+ X##bu ^= input[ 4]; \
+ X##ga ^= input[ 5]; \
+ X##ge ^= input[ 6]; \
+ X##gi ^= input[ 7]; \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ if (laneCount < 9) { \
+ } \
+ else { \
+ X##go ^= input[ 8]; \
+ } \
+ } \
+ else { \
+ X##go ^= input[ 8]; \
+ X##gu ^= input[ 9]; \
+ if (laneCount < 11) { \
+ } \
+ else { \
+ X##ka ^= input[10]; \
+ } \
+ } \
+ } \
+ else { \
+ X##go ^= input[ 8]; \
+ X##gu ^= input[ 9]; \
+ X##ka ^= input[10]; \
+ X##ke ^= input[11]; \
+ if (laneCount < 14) { \
+ if (laneCount < 13) { \
+ } \
+ else { \
+ X##ki ^= input[12]; \
+ } \
+ } \
+ else { \
+ X##ki ^= input[12]; \
+ X##ko ^= input[13]; \
+ if (laneCount < 15) { \
+ } \
+ else { \
+ X##ku ^= input[14]; \
+ } \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ X##ba ^= input[ 0]; \
+ X##be ^= input[ 1]; \
+ X##bi ^= input[ 2]; \
+ X##bo ^= input[ 3]; \
+ X##bu ^= input[ 4]; \
+ X##ga ^= input[ 5]; \
+ X##ge ^= input[ 6]; \
+ X##gi ^= input[ 7]; \
+ X##go ^= input[ 8]; \
+ X##gu ^= input[ 9]; \
+ X##ka ^= input[10]; \
+ X##ke ^= input[11]; \
+ X##ki ^= input[12]; \
+ X##ko ^= input[13]; \
+ X##ku ^= input[14]; \
+ X##ma ^= input[15]; \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ if (laneCount < 17) { \
+ } \
+ else { \
+ X##me ^= input[16]; \
+ } \
+ } \
+ else { \
+ X##me ^= input[16]; \
+ X##mi ^= input[17]; \
+ if (laneCount < 19) { \
+ } \
+ else { \
+ X##mo ^= input[18]; \
+ } \
+ } \
+ } \
+ else { \
+ X##me ^= input[16]; \
+ X##mi ^= input[17]; \
+ X##mo ^= input[18]; \
+ X##mu ^= input[19]; \
+ if (laneCount < 22) { \
+ if (laneCount < 21) { \
+ } \
+ else { \
+ X##sa ^= input[20]; \
+ } \
+ } \
+ else { \
+ X##sa ^= input[20]; \
+ X##se ^= input[21]; \
+ if (laneCount < 23) { \
+ } \
+ else { \
+ X##si ^= input[22]; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ X##me ^= input[16]; \
+ X##mi ^= input[17]; \
+ X##mo ^= input[18]; \
+ X##mu ^= input[19]; \
+ X##sa ^= input[20]; \
+ X##se ^= input[21]; \
+ X##si ^= input[22]; \
+ X##so ^= input[23]; \
+ if (laneCount < 25) { \
+ } \
+ else { \
+ X##su ^= input[24]; \
+ } \
+ } \
+ }
+
+#ifdef UseBebigokimisa
+
+#define copyToStateAndOutput(X, state, output, laneCount) \
+ if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ state[ 0] = X##ba; \
+ if (laneCount >= 1) { \
+ output[ 0] = X##ba; \
+ } \
+ state[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = ~X##be; \
+ state[ 2] = X##bi; \
+ if (laneCount >= 3) { \
+ output[ 2] = ~X##bi; \
+ } \
+ } \
+ state[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = ~X##be; \
+ state[ 2] = X##bi; \
+ output[ 2] = ~X##bi; \
+ state[ 3] = X##bo; \
+ output[ 3] = X##bo; \
+ if (laneCount < 6) { \
+ state[ 4] = X##bu; \
+ if (laneCount >= 5) { \
+ output[ 4] = X##bu; \
+ } \
+ state[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ } \
+ else { \
+ state[ 4] = X##bu; \
+ output[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ output[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ if (laneCount >= 7) { \
+ output[ 6] = X##ge; \
+ } \
+ } \
+ } \
+ state[ 7] = X##gi; \
+ state[ 8] = X##go; \
+ state[ 9] = X##gu; \
+ state[10] = X##ka; \
+ state[11] = X##ke; \
+ state[12] = X##ki; \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = ~X##be; \
+ state[ 2] = X##bi; \
+ output[ 2] = ~X##bi; \
+ state[ 3] = X##bo; \
+ output[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ output[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ output[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ output[ 6] = X##ge; \
+ state[ 7] = X##gi; \
+ output[ 7] = X##gi; \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ state[ 8] = X##go; \
+ if (laneCount >= 9) { \
+ output[ 8] = ~X##go; \
+ } \
+ state[ 9] = X##gu; \
+ state[10] = X##ka; \
+ } \
+ else { \
+ state[ 8] = X##go; \
+ output[ 8] = ~X##go; \
+ state[ 9] = X##gu; \
+ output[ 9] = X##gu; \
+ state[10] = X##ka; \
+ if (laneCount >= 11) { \
+ output[10] = X##ka; \
+ } \
+ } \
+ state[11] = X##ke; \
+ state[12] = X##ki; \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ } \
+ else { \
+ state[ 8] = X##go; \
+ output[ 8] = ~X##go; \
+ state[ 9] = X##gu; \
+ output[ 9] = X##gu; \
+ state[10] = X##ka; \
+ output[10] = X##ka; \
+ state[11] = X##ke; \
+ output[11] = X##ke; \
+ if (laneCount < 14) { \
+ state[12] = X##ki; \
+ if (laneCount >= 13) { \
+ output[12] = ~X##ki; \
+ } \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ } \
+ else { \
+ state[12] = X##ki; \
+ output[12] = ~X##ki; \
+ state[13] = X##ko; \
+ output[13] = X##ko; \
+ state[14] = X##ku; \
+ if (laneCount >= 15) { \
+ output[14] = X##ku; \
+ } \
+ } \
+ } \
+ } \
+ state[15] = X##ma; \
+ state[16] = X##me; \
+ state[17] = X##mi; \
+ state[18] = X##mo; \
+ state[19] = X##mu; \
+ state[20] = X##sa; \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ state[23] = X##so; \
+ state[24] = X##su; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = ~X##be; \
+ state[ 2] = X##bi; \
+ output[ 2] = ~X##bi; \
+ state[ 3] = X##bo; \
+ output[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ output[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ output[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ output[ 6] = X##ge; \
+ state[ 7] = X##gi; \
+ output[ 7] = X##gi; \
+ state[ 8] = X##go; \
+ output[ 8] = ~X##go; \
+ state[ 9] = X##gu; \
+ output[ 9] = X##gu; \
+ state[10] = X##ka; \
+ output[10] = X##ka; \
+ state[11] = X##ke; \
+ output[11] = X##ke; \
+ state[12] = X##ki; \
+ output[12] = ~X##ki; \
+ state[13] = X##ko; \
+ output[13] = X##ko; \
+ state[14] = X##ku; \
+ output[14] = X##ku; \
+ state[15] = X##ma; \
+ output[15] = X##ma; \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ state[16] = X##me; \
+ if (laneCount >= 17) { \
+ output[16] = X##me; \
+ } \
+ state[17] = X##mi; \
+ state[18] = X##mo; \
+ } \
+ else { \
+ state[16] = X##me; \
+ output[16] = X##me; \
+ state[17] = X##mi; \
+ output[17] = ~X##mi; \
+ state[18] = X##mo; \
+ if (laneCount >= 19) { \
+ output[18] = X##mo; \
+ } \
+ } \
+ state[19] = X##mu; \
+ state[20] = X##sa; \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ } \
+ else { \
+ state[16] = X##me; \
+ output[16] = X##me; \
+ state[17] = X##mi; \
+ output[17] = ~X##mi; \
+ state[18] = X##mo; \
+ output[18] = X##mo; \
+ state[19] = X##mu; \
+ output[19] = X##mu; \
+ if (laneCount < 22) { \
+ state[20] = X##sa; \
+ if (laneCount >= 21) { \
+ output[20] = ~X##sa; \
+ } \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ } \
+ else { \
+ state[20] = X##sa; \
+ output[20] = ~X##sa; \
+ state[21] = X##se; \
+ output[21] = X##se; \
+ state[22] = X##si; \
+ if (laneCount >= 23) { \
+ output[22] = X##si; \
+ } \
+ } \
+ } \
+ state[23] = X##so; \
+ state[24] = X##su; \
+ } \
+ else { \
+ state[16] = X##me; \
+ output[16] = X##me; \
+ state[17] = X##mi; \
+ output[17] = ~X##mi; \
+ state[18] = X##mo; \
+ output[18] = X##mo; \
+ state[19] = X##mu; \
+ output[19] = X##mu; \
+ state[20] = X##sa; \
+ output[20] = ~X##sa; \
+ state[21] = X##se; \
+ output[21] = X##se; \
+ state[22] = X##si; \
+ output[22] = X##si; \
+ state[23] = X##so; \
+ output[23] = X##so; \
+ state[24] = X##su; \
+ if (laneCount >= 25) { \
+ output[24] = X##su; \
+ } \
+ } \
+ }
+
+#define output(X, output, laneCount) \
+ if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ if (laneCount >= 1) { \
+ output[ 0] = X##ba; \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = ~X##be; \
+ if (laneCount >= 3) { \
+ output[ 2] = ~X##bi; \
+ } \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = ~X##be; \
+ output[ 2] = ~X##bi; \
+ output[ 3] = X##bo; \
+ if (laneCount < 6) { \
+ if (laneCount >= 5) { \
+ output[ 4] = X##bu; \
+ } \
+ } \
+ else { \
+ output[ 4] = X##bu; \
+ output[ 5] = X##ga; \
+ if (laneCount >= 7) { \
+ output[ 6] = X##ge; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = ~X##be; \
+ output[ 2] = ~X##bi; \
+ output[ 3] = X##bo; \
+ output[ 4] = X##bu; \
+ output[ 5] = X##ga; \
+ output[ 6] = X##ge; \
+ output[ 7] = X##gi; \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ if (laneCount >= 9) { \
+ output[ 8] = ~X##go; \
+ } \
+ } \
+ else { \
+ output[ 8] = ~X##go; \
+ output[ 9] = X##gu; \
+ if (laneCount >= 11) { \
+ output[10] = X##ka; \
+ } \
+ } \
+ } \
+ else { \
+ output[ 8] = ~X##go; \
+ output[ 9] = X##gu; \
+ output[10] = X##ka; \
+ output[11] = X##ke; \
+ if (laneCount < 14) { \
+ if (laneCount >= 13) { \
+ output[12] = ~X##ki; \
+ } \
+ } \
+ else { \
+ output[12] = ~X##ki; \
+ output[13] = X##ko; \
+ if (laneCount >= 15) { \
+ output[14] = X##ku; \
+ } \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = ~X##be; \
+ output[ 2] = ~X##bi; \
+ output[ 3] = X##bo; \
+ output[ 4] = X##bu; \
+ output[ 5] = X##ga; \
+ output[ 6] = X##ge; \
+ output[ 7] = X##gi; \
+ output[ 8] = ~X##go; \
+ output[ 9] = X##gu; \
+ output[10] = X##ka; \
+ output[11] = X##ke; \
+ output[12] = ~X##ki; \
+ output[13] = X##ko; \
+ output[14] = X##ku; \
+ output[15] = X##ma; \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ if (laneCount >= 17) { \
+ output[16] = X##me; \
+ } \
+ } \
+ else { \
+ output[16] = X##me; \
+ output[17] = ~X##mi; \
+ if (laneCount >= 19) { \
+ output[18] = X##mo; \
+ } \
+ } \
+ } \
+ else { \
+ output[16] = X##me; \
+ output[17] = ~X##mi; \
+ output[18] = X##mo; \
+ output[19] = X##mu; \
+ if (laneCount < 22) { \
+ if (laneCount >= 21) { \
+ output[20] = ~X##sa; \
+ } \
+ } \
+ else { \
+ output[20] = ~X##sa; \
+ output[21] = X##se; \
+ if (laneCount >= 23) { \
+ output[22] = X##si; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ output[16] = X##me; \
+ output[17] = ~X##mi; \
+ output[18] = X##mo; \
+ output[19] = X##mu; \
+ output[20] = ~X##sa; \
+ output[21] = X##se; \
+ output[22] = X##si; \
+ output[23] = X##so; \
+ if (laneCount >= 25) { \
+ output[24] = X##su; \
+ } \
+ } \
+ }
+
+#define wrapOne(X, input, output, index, name) \
+ X##name ^= input[index]; \
+ output[index] = X##name;
+
+#define wrapOneInvert(X, input, output, index, name) \
+ X##name ^= input[index]; \
+ output[index] = ~X##name;
+
+#define unwrapOne(X, input, output, index, name) \
+ output[index] = input[index] ^ X##name; \
+ X##name ^= output[index];
+
+#define unwrapOneInvert(X, input, output, index, name) \
+ output[index] = ~(input[index] ^ X##name); \
+ X##name ^= output[index]; \
+
+#else /* UseBebigokimisa */
+
+#define copyToStateAndOutput(X, state, output, laneCount) \
+ if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ state[ 0] = X##ba; \
+ if (laneCount >= 1) { \
+ output[ 0] = X##ba; \
+ } \
+ state[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ if (laneCount >= 3) { \
+ output[ 2] = X##bi; \
+ } \
+ } \
+ state[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ output[ 2] = X##bi; \
+ state[ 3] = X##bo; \
+ output[ 3] = X##bo; \
+ if (laneCount < 6) { \
+ state[ 4] = X##bu; \
+ if (laneCount >= 5) { \
+ output[ 4] = X##bu; \
+ } \
+ state[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ } \
+ else { \
+ state[ 4] = X##bu; \
+ output[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ output[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ if (laneCount >= 7) { \
+ output[ 6] = X##ge; \
+ } \
+ } \
+ } \
+ state[ 7] = X##gi; \
+ state[ 8] = X##go; \
+ state[ 9] = X##gu; \
+ state[10] = X##ka; \
+ state[11] = X##ke; \
+ state[12] = X##ki; \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ output[ 2] = X##bi; \
+ state[ 3] = X##bo; \
+ output[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ output[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ output[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ output[ 6] = X##ge; \
+ state[ 7] = X##gi; \
+ output[ 7] = X##gi; \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ state[ 8] = X##go; \
+ if (laneCount >= 9) { \
+ output[ 8] = X##go; \
+ } \
+ state[ 9] = X##gu; \
+ state[10] = X##ka; \
+ } \
+ else { \
+ state[ 8] = X##go; \
+ output[ 8] = X##go; \
+ state[ 9] = X##gu; \
+ output[ 9] = X##gu; \
+ state[10] = X##ka; \
+ if (laneCount >= 11) { \
+ output[10] = X##ka; \
+ } \
+ } \
+ state[11] = X##ke; \
+ state[12] = X##ki; \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ } \
+ else { \
+ state[ 8] = X##go; \
+ output[ 8] = X##go; \
+ state[ 9] = X##gu; \
+ output[ 9] = X##gu; \
+ state[10] = X##ka; \
+ output[10] = X##ka; \
+ state[11] = X##ke; \
+ output[11] = X##ke; \
+ if (laneCount < 14) { \
+ state[12] = X##ki; \
+ if (laneCount >= 13) { \
+ output[12]= X##ki; \
+ } \
+ state[13] = X##ko; \
+ state[14] = X##ku; \
+ } \
+ else { \
+ state[12] = X##ki; \
+ output[12]= X##ki; \
+ state[13] = X##ko; \
+ output[13] = X##ko; \
+ state[14] = X##ku; \
+ if (laneCount >= 15) { \
+ output[14] = X##ku; \
+ } \
+ } \
+ } \
+ } \
+ state[15] = X##ma; \
+ state[16] = X##me; \
+ state[17] = X##mi; \
+ state[18] = X##mo; \
+ state[19] = X##mu; \
+ state[20] = X##sa; \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ state[23] = X##so; \
+ state[24] = X##su; \
+ } \
+ else { \
+ state[ 0] = X##ba; \
+ output[ 0] = X##ba; \
+ state[ 1] = X##be; \
+ output[ 1] = X##be; \
+ state[ 2] = X##bi; \
+ output[ 2] = X##bi; \
+ state[ 3] = X##bo; \
+ output[ 3] = X##bo; \
+ state[ 4] = X##bu; \
+ output[ 4] = X##bu; \
+ state[ 5] = X##ga; \
+ output[ 5] = X##ga; \
+ state[ 6] = X##ge; \
+ output[ 6] = X##ge; \
+ state[ 7] = X##gi; \
+ output[ 7] = X##gi; \
+ state[ 8] = X##go; \
+ output[ 8] = X##go; \
+ state[ 9] = X##gu; \
+ output[ 9] = X##gu; \
+ state[10] = X##ka; \
+ output[10] = X##ka; \
+ state[11] = X##ke; \
+ output[11] = X##ke; \
+ state[12] = X##ki; \
+ output[12]= X##ki; \
+ state[13] = X##ko; \
+ output[13] = X##ko; \
+ state[14] = X##ku; \
+ output[14] = X##ku; \
+ state[15] = X##ma; \
+ output[15] = X##ma; \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ state[16] = X##me; \
+ if (laneCount >= 17) { \
+ output[16] = X##me; \
+ } \
+ state[17] = X##mi; \
+ state[18] = X##mo; \
+ } \
+ else { \
+ state[16] = X##me; \
+ output[16] = X##me; \
+ state[17] = X##mi; \
+ output[17] = X##mi; \
+ state[18] = X##mo; \
+ if (laneCount >= 19) { \
+ output[18] = X##mo; \
+ } \
+ } \
+ state[19] = X##mu; \
+ state[20] = X##sa; \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ } \
+ else { \
+ state[16] = X##me; \
+ output[16] = X##me; \
+ state[17] = X##mi; \
+ output[17] = X##mi; \
+ state[18] = X##mo; \
+ output[18] = X##mo; \
+ state[19] = X##mu; \
+ output[19] = X##mu; \
+ if (laneCount < 22) { \
+ state[20] = X##sa; \
+ if (laneCount >= 21) { \
+ output[20] = X##sa; \
+ } \
+ state[21] = X##se; \
+ state[22] = X##si; \
+ } \
+ else { \
+ state[20] = X##sa; \
+ output[20] = X##sa; \
+ state[21] = X##se; \
+ output[21] = X##se; \
+ state[22] = X##si; \
+ if (laneCount >= 23) { \
+ output[22] = X##si; \
+ } \
+ } \
+ } \
+ state[23] = X##so; \
+ state[24] = X##su; \
+ } \
+ else { \
+ state[16] = X##me; \
+ output[16] = X##me; \
+ state[17] = X##mi; \
+ output[17] = X##mi; \
+ state[18] = X##mo; \
+ output[18] = X##mo; \
+ state[19] = X##mu; \
+ output[19] = X##mu; \
+ state[20] = X##sa; \
+ output[20] = X##sa; \
+ state[21] = X##se; \
+ output[21] = X##se; \
+ state[22] = X##si; \
+ output[22] = X##si; \
+ state[23] = X##so; \
+ output[23] = X##so; \
+ state[24] = X##su; \
+ if (laneCount >= 25) { \
+ output[24] = X##su; \
+ } \
+ } \
+ }
+
+#define output(X, output, laneCount) \
+ if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ if (laneCount >= 1) { \
+ output[ 0] = X##ba; \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = X##be; \
+ if (laneCount >= 3) { \
+ output[ 2] = X##bi; \
+ } \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = X##be; \
+ output[ 2] = X##bi; \
+ output[ 3] = X##bo; \
+ if (laneCount < 6) { \
+ if (laneCount >= 5) { \
+ output[ 4] = X##bu; \
+ } \
+ } \
+ else { \
+ output[ 4] = X##bu; \
+ output[ 5] = X##ga; \
+ if (laneCount >= 7) { \
+ output[ 6] = X##ge; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = X##be; \
+ output[ 2] = X##bi; \
+ output[ 3] = X##bo; \
+ output[ 4] = X##bu; \
+ output[ 5] = X##ga; \
+ output[ 6] = X##ge; \
+ output[ 7] = X##gi; \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ if (laneCount >= 9) { \
+ output[ 8] = X##go; \
+ } \
+ } \
+ else { \
+ output[ 8] = X##go; \
+ output[ 9] = X##gu; \
+ if (laneCount >= 11) { \
+ output[10] = X##ka; \
+ } \
+ } \
+ } \
+ else { \
+ output[ 8] = X##go; \
+ output[ 9] = X##gu; \
+ output[10] = X##ka; \
+ output[11] = X##ke; \
+ if (laneCount < 14) { \
+ if (laneCount >= 13) { \
+ output[12] = X##ki; \
+ } \
+ } \
+ else { \
+ output[12] = X##ki; \
+ output[13] = X##ko; \
+ if (laneCount >= 15) { \
+ output[14] = X##ku; \
+ } \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ output[ 0] = X##ba; \
+ output[ 1] = X##be; \
+ output[ 2] = X##bi; \
+ output[ 3] = X##bo; \
+ output[ 4] = X##bu; \
+ output[ 5] = X##ga; \
+ output[ 6] = X##ge; \
+ output[ 7] = X##gi; \
+ output[ 8] = X##go; \
+ output[ 9] = X##gu; \
+ output[10] = X##ka; \
+ output[11] = X##ke; \
+ output[12] = X##ki; \
+ output[13] = X##ko; \
+ output[14] = X##ku; \
+ output[15] = X##ma; \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ if (laneCount >= 17) { \
+ output[16] = X##me; \
+ } \
+ } \
+ else { \
+ output[16] = X##me; \
+ output[17] = X##mi; \
+ if (laneCount >= 19) { \
+ output[18] = X##mo; \
+ } \
+ } \
+ } \
+ else { \
+ output[16] = X##me; \
+ output[17] = X##mi; \
+ output[18] = X##mo; \
+ output[19] = X##mu; \
+ if (laneCount < 22) { \
+ if (laneCount >= 21) { \
+ output[20] = X##sa; \
+ } \
+ } \
+ else { \
+ output[20] = X##sa; \
+ output[21] = X##se; \
+ if (laneCount >= 23) { \
+ output[22] = X##si; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ output[16] = X##me; \
+ output[17] = X##mi; \
+ output[18] = X##mo; \
+ output[19] = X##mu; \
+ output[20] = X##sa; \
+ output[21] = X##se; \
+ output[22] = X##si; \
+ output[23] = X##so; \
+ if (laneCount >= 25) { \
+ output[24] = X##su; \
+ } \
+ } \
+ }
+
+#define wrapOne(X, input, output, index, name) \
+ X##name ^= input[index]; \
+ output[index] = X##name;
+
+#define wrapOneInvert(X, input, output, index, name) \
+ X##name ^= input[index]; \
+ output[index] = X##name;
+
+#define unwrapOne(X, input, output, index, name) \
+ output[index] = input[index] ^ X##name; \
+ X##name ^= output[index];
+
+#define unwrapOneInvert(X, input, output, index, name) \
+ output[index] = input[index] ^ X##name; \
+ X##name ^= output[index];
+
+#endif
+
+#define wrap(X, input, output, laneCount, trailingBits) \
+ if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ if (laneCount < 1) { \
+ X##ba ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 0, ba) \
+ X##be ^= trailingBits; \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 0, ba) \
+ wrapOneInvert(X, input, output, 1, be) \
+ if (laneCount < 3) { \
+ X##bi ^= trailingBits; \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 2, bi) \
+ X##bo ^= trailingBits; \
+ } \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 0, ba) \
+ wrapOneInvert(X, input, output, 1, be) \
+ wrapOneInvert(X, input, output, 2, bi) \
+ wrapOne(X, input, output, 3, bo) \
+ if (laneCount < 6) { \
+ if (laneCount < 5) { \
+ X##bu ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 4, bu) \
+ X##ga ^= trailingBits; \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 4, bu) \
+ wrapOne(X, input, output, 5, ga) \
+ if (laneCount < 7) { \
+ X##ge ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 6, ge) \
+ X##gi ^= trailingBits; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 0, ba) \
+ wrapOneInvert(X, input, output, 1, be) \
+ wrapOneInvert(X, input, output, 2, bi) \
+ wrapOne(X, input, output, 3, bo) \
+ wrapOne(X, input, output, 4, bu) \
+ wrapOne(X, input, output, 5, ga) \
+ wrapOne(X, input, output, 6, ge) \
+ wrapOne(X, input, output, 7, gi) \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ if (laneCount < 9) { \
+ X##go ^= trailingBits; \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 8, go) \
+ X##gu ^= trailingBits; \
+ } \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 8, go) \
+ wrapOne(X, input, output, 9, gu) \
+ if (laneCount < 11) { \
+ X##ka ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 10, ka) \
+ X##ke ^= trailingBits; \
+ } \
+ } \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 8, go) \
+ wrapOne(X, input, output, 9, gu) \
+ wrapOne(X, input, output, 10, ka) \
+ wrapOne(X, input, output, 11, ke) \
+ if (laneCount < 14) { \
+ if (laneCount < 13) { \
+ X##ki ^= trailingBits; \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 12, ki) \
+ X##ko ^= trailingBits; \
+ } \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 12, ki) \
+ wrapOne(X, input, output, 13, ko) \
+ if (laneCount < 15) { \
+ X##ku ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 14, ku) \
+ X##ma ^= trailingBits; \
+ } \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 0, ba) \
+ wrapOneInvert(X, input, output, 1, be) \
+ wrapOneInvert(X, input, output, 2, bi) \
+ wrapOne(X, input, output, 3, bo) \
+ wrapOne(X, input, output, 4, bu) \
+ wrapOne(X, input, output, 5, ga) \
+ wrapOne(X, input, output, 6, ge) \
+ wrapOne(X, input, output, 7, gi) \
+ wrapOneInvert(X, input, output, 8, go) \
+ wrapOne(X, input, output, 9, gu) \
+ wrapOne(X, input, output, 10, ka) \
+ wrapOne(X, input, output, 11, ke) \
+ wrapOneInvert(X, input, output, 12, ki) \
+ wrapOne(X, input, output, 13, ko) \
+ wrapOne(X, input, output, 14, ku) \
+ wrapOne(X, input, output, 15, ma) \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ if (laneCount < 17) { \
+ X##me ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 16, me) \
+ X##mi ^= trailingBits; \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 16, me) \
+ wrapOneInvert(X, input, output, 17, mi) \
+ if (laneCount < 19) { \
+ X##mo ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 18, mo) \
+ X##mu ^= trailingBits; \
+ } \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 16, me) \
+ wrapOneInvert(X, input, output, 17, mi) \
+ wrapOne(X, input, output, 18, mo) \
+ wrapOne(X, input, output, 19, mu) \
+ if (laneCount < 22) { \
+ if (laneCount < 21) { \
+ X##sa ^= trailingBits; \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 20, sa) \
+ X##se ^= trailingBits; \
+ } \
+ } \
+ else { \
+ wrapOneInvert(X, input, output, 20, sa) \
+ wrapOne(X, input, output, 21, se) \
+ if (laneCount < 23) { \
+ X##si ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 22, si) \
+ X##so ^= trailingBits; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ wrapOne(X, input, output, 16, me) \
+ wrapOneInvert(X, input, output, 17, mi) \
+ wrapOne(X, input, output, 18, mo) \
+ wrapOne(X, input, output, 19, mu) \
+ wrapOneInvert(X, input, output, 20, sa) \
+ wrapOne(X, input, output, 21, se) \
+ wrapOne(X, input, output, 22, si) \
+ wrapOne(X, input, output, 23, so) \
+ if (laneCount < 25) { \
+ X##su ^= trailingBits; \
+ } \
+ else { \
+ wrapOne(X, input, output, 24, su) \
+ } \
+ } \
+ }
+
+#define unwrap(X, input, output, laneCount, trailingBits) \
+ if (laneCount < 16) { \
+ if (laneCount < 8) { \
+ if (laneCount < 4) { \
+ if (laneCount < 2) { \
+ if (laneCount < 1) { \
+ X##ba ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 0, ba) \
+ X##be ^= trailingBits; \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 0, ba) \
+ unwrapOneInvert(X, input, output, 1, be) \
+ if (laneCount < 3) { \
+ X##bi ^= trailingBits; \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 2, bi) \
+ X##bo ^= trailingBits; \
+ } \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 0, ba) \
+ unwrapOneInvert(X, input, output, 1, be) \
+ unwrapOneInvert(X, input, output, 2, bi) \
+ unwrapOne(X, input, output, 3, bo) \
+ if (laneCount < 6) { \
+ if (laneCount < 5) { \
+ X##bu ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 4, bu) \
+ X##ga ^= trailingBits; \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 4, bu) \
+ unwrapOne(X, input, output, 5, ga) \
+ if (laneCount < 7) { \
+ X##ge ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 6, ge) \
+ X##gi ^= trailingBits; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 0, ba) \
+ unwrapOneInvert(X, input, output, 1, be) \
+ unwrapOneInvert(X, input, output, 2, bi) \
+ unwrapOne(X, input, output, 3, bo) \
+ unwrapOne(X, input, output, 4, bu) \
+ unwrapOne(X, input, output, 5, ga) \
+ unwrapOne(X, input, output, 6, ge) \
+ unwrapOne(X, input, output, 7, gi) \
+ if (laneCount < 12) { \
+ if (laneCount < 10) { \
+ if (laneCount < 9) { \
+ X##go ^= trailingBits; \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 8, go) \
+ X##gu ^= trailingBits; \
+ } \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 8, go) \
+ unwrapOne(X, input, output, 9, gu) \
+ if (laneCount < 11) { \
+ X##ka ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 10, ka) \
+ X##ke ^= trailingBits; \
+ } \
+ } \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 8, go) \
+ unwrapOne(X, input, output, 9, gu) \
+ unwrapOne(X, input, output, 10, ka) \
+ unwrapOne(X, input, output, 11, ke) \
+ if (laneCount < 14) { \
+ if (laneCount < 13) { \
+ X##ki ^= trailingBits; \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 12, ki) \
+ X##ko ^= trailingBits; \
+ } \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 12, ki) \
+ unwrapOne(X, input, output, 13, ko) \
+ if (laneCount < 15) { \
+ X##ku ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 14, ku) \
+ X##ma ^= trailingBits; \
+ } \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 0, ba) \
+ unwrapOneInvert(X, input, output, 1, be) \
+ unwrapOneInvert(X, input, output, 2, bi) \
+ unwrapOne(X, input, output, 3, bo) \
+ unwrapOne(X, input, output, 4, bu) \
+ unwrapOne(X, input, output, 5, ga) \
+ unwrapOne(X, input, output, 6, ge) \
+ unwrapOne(X, input, output, 7, gi) \
+ unwrapOneInvert(X, input, output, 8, go) \
+ unwrapOne(X, input, output, 9, gu) \
+ unwrapOne(X, input, output, 10, ka) \
+ unwrapOne(X, input, output, 11, ke) \
+ unwrapOneInvert(X, input, output, 12, ki) \
+ unwrapOne(X, input, output, 13, ko) \
+ unwrapOne(X, input, output, 14, ku) \
+ unwrapOne(X, input, output, 15, ma) \
+ if (laneCount < 24) { \
+ if (laneCount < 20) { \
+ if (laneCount < 18) { \
+ if (laneCount < 17) { \
+ X##me ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 16, me) \
+ X##mi ^= trailingBits; \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 16, me) \
+ unwrapOneInvert(X, input, output, 17, mi) \
+ if (laneCount < 19) { \
+ X##mo ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 18, mo) \
+ X##mu ^= trailingBits; \
+ } \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 16, me) \
+ unwrapOneInvert(X, input, output, 17, mi) \
+ unwrapOne(X, input, output, 18, mo) \
+ unwrapOne(X, input, output, 19, mu) \
+ if (laneCount < 22) { \
+ if (laneCount < 21) { \
+ X##sa ^= trailingBits; \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 20, sa) \
+ X##se ^= trailingBits; \
+ } \
+ } \
+ else { \
+ unwrapOneInvert(X, input, output, 20, sa) \
+ unwrapOne(X, input, output, 21, se) \
+ if (laneCount < 23) { \
+ X##si ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 22, si) \
+ X##so ^= trailingBits; \
+ } \
+ } \
+ } \
+ } \
+ else { \
+ unwrapOne(X, input, output, 16, me) \
+ unwrapOneInvert(X, input, output, 17, mi) \
+ unwrapOne(X, input, output, 18, mo) \
+ unwrapOne(X, input, output, 19, mu) \
+ unwrapOneInvert(X, input, output, 20, sa) \
+ unwrapOne(X, input, output, 21, se) \
+ unwrapOne(X, input, output, 22, si) \
+ unwrapOne(X, input, output, 23, so) \
+ if (laneCount < 25) { \
+ X##su ^= trailingBits; \
+ } \
+ else { \
+ unwrapOne(X, input, output, 24, su) \
+ } \
+ } \
+ }
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakP_1600_SnP_h_
+#define _KeccakP_1600_SnP_h_
+
+/** For the documentation, see SnP-documentation.h.
+ */
+
+#include "brg_endian.h"
+#include "KeccakP-1600-opt64-config.h"
+
+#define KeccakP1600_implementation "generic 64-bit optimized implementation (" KeccakP1600_implementation_config ")"
+#define KeccakP1600_stateSizeInBytes 200
+#define KeccakP1600_stateAlignment 8
+#define KeccakF1600_FastLoop_supported
+
+#include <stddef.h>
+
+#define KeccakP1600_StaticInitialize()
+void KeccakP1600_Initialize(void *state);
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#define KeccakP1600_AddByte(state, byte, offset) \
+ ((unsigned char*)(state))[(offset)] ^= (byte)
+#else
+void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
+#endif
+void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
+void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
+void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
+void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
+void KeccakP1600_Permute_12rounds(void *state);
+void KeccakP1600_Permute_24rounds(void *state);
+void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
+void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
+size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
+
+#endif
--- /dev/null
+#define KeccakP1600_implementation_config "lane complementing, all rounds unrolled"
+#define KeccakP1600_fullUnrolling
+#define KeccakP1600_useLaneComplementing
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include <string.h>
+#include <stdlib.h>
+#include "brg_endian.h"
+#include "KeccakP-1600-opt64-config.h"
+
+typedef unsigned char UINT8;
+typedef unsigned long long int UINT64;
+
+#if defined(KeccakP1600_useLaneComplementing)
+#define UseBebigokimisa
+#endif
+
+#if defined(_MSC_VER)
+#define ROL64(a, offset) _rotl64(a, offset)
+#elif defined(KeccakP1600_useSHLD)
+ #define ROL64(x,N) ({ \
+ register UINT64 __out; \
+ register UINT64 __in = x; \
+ __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
+ __out; \
+ })
+#else
+#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
+#endif
+
+#include "KeccakP-1600-64.macros"
+#ifdef KeccakP1600_fullUnrolling
+#define FullUnrolling
+#else
+#define Unrolling KeccakP1600_unrolling
+#endif
+#include "KeccakP-1600-unrolling.macros"
+#include "SnP-Relaned.h"
+
+static const UINT64 KeccakF1600RoundConstants[24] = {
+ 0x0000000000000001ULL,
+ 0x0000000000008082ULL,
+ 0x800000000000808aULL,
+ 0x8000000080008000ULL,
+ 0x000000000000808bULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008009ULL,
+ 0x000000000000008aULL,
+ 0x0000000000000088ULL,
+ 0x0000000080008009ULL,
+ 0x000000008000000aULL,
+ 0x000000008000808bULL,
+ 0x800000000000008bULL,
+ 0x8000000000008089ULL,
+ 0x8000000000008003ULL,
+ 0x8000000000008002ULL,
+ 0x8000000000000080ULL,
+ 0x000000000000800aULL,
+ 0x800000008000000aULL,
+ 0x8000000080008081ULL,
+ 0x8000000000008080ULL,
+ 0x0000000080000001ULL,
+ 0x8000000080008008ULL };
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Initialize(void *state)
+{
+ memset(state, 0, 200);
+#ifdef KeccakP1600_useLaneComplementing
+ ((UINT64*)state)[ 1] = ~(UINT64)0;
+ ((UINT64*)state)[ 2] = ~(UINT64)0;
+ ((UINT64*)state)[ 8] = ~(UINT64)0;
+ ((UINT64*)state)[12] = ~(UINT64)0;
+ ((UINT64*)state)[17] = ~(UINT64)0;
+ ((UINT64*)state)[20] = ~(UINT64)0;
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ UINT64 lane;
+ if (length == 0)
+ return;
+ if (length == 1)
+ lane = data[0];
+ else {
+ lane = 0;
+ memcpy(&lane, data, length);
+ }
+ lane <<= offset*8;
+#else
+ UINT64 lane = 0;
+ unsigned int i;
+ for(i=0; i<length; i++)
+ lane |= ((UINT64)data[i]) << ((i+offset)*8);
+#endif
+ ((UINT64*)state)[lanePosition] ^= lane;
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ unsigned int i = 0;
+#ifdef NO_MISALIGNED_ACCESSES
+ /* If either pointer is misaligned, fall back to byte-wise xor. */
+ if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
+ for (i = 0; i < laneCount * 8; i++) {
+ ((unsigned char*)state)[i] ^= data[i];
+ }
+ }
+ else
+#endif
+ {
+ /* Otherwise... */
+ for( ; (i+8)<=laneCount; i+=8) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
+ ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
+ ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
+ ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
+ ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
+ ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
+ ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
+ }
+ for( ; (i+4)<=laneCount; i+=4) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
+ ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
+ ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
+ }
+ for( ; (i+2)<=laneCount; i+=2) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
+ }
+ if (i<laneCount) {
+ ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
+ }
+ }
+#else
+ unsigned int i;
+ UINT8 *curData = data;
+ for(i=0; i<laneCount; i++, curData+=8) {
+ UINT64 lane = (UINT64)curData[0]
+ | ((UINT64)curData[1] << 8)
+ | ((UINT64)curData[2] << 16)
+ | ((UINT64)curData[3] << 24)
+ | ((UINT64)curData[4] <<32)
+ | ((UINT64)curData[5] << 40)
+ | ((UINT64)curData[6] << 48)
+ | ((UINT64)curData[7] << 56);
+ ((UINT64*)state)[i] ^= lane;
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
+void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
+{
+ UINT64 lane = byte;
+ lane <<= (offset%8)*8;
+ ((UINT64*)state)[offset/8] ^= lane;
+}
+#endif
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#ifdef KeccakP1600_useLaneComplementing
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
+ unsigned int i;
+ for(i=0; i<length; i++)
+ ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
+ }
+ else
+#endif
+ {
+ memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
+ }
+#else
+#error "Not yet implemented"
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#ifdef KeccakP1600_useLaneComplementing
+ unsigned int lanePosition;
+
+ for(lanePosition=0; lanePosition<laneCount; lanePosition++)
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
+ else
+ ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
+#else
+ memcpy(state, data, laneCount*8);
+#endif
+#else
+#error "Not yet implemented"
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#ifdef KeccakP1600_useLaneComplementing
+ unsigned int lanePosition;
+
+ for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ ((UINT64*)state)[lanePosition] = ~0;
+ else
+ ((UINT64*)state)[lanePosition] = 0;
+ if (byteCount%8 != 0) {
+ lanePosition = byteCount/8;
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
+ else
+ memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
+ }
+#else
+ memset(state, 0, byteCount);
+#endif
+#else
+#error "Not yet implemented"
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Permute_Nrounds(void *state, unsigned int nr)
+{
+ declareABCDE
+ unsigned int i;
+ UINT64 *stateAsLanes = (UINT64*)state;
+
+ copyFromState(A, stateAsLanes)
+ roundsN(nr)
+ copyToState(stateAsLanes, A)
+
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Permute_24rounds(void *state)
+{
+ declareABCDE
+ #ifndef KeccakP1600_fullUnrolling
+ unsigned int i;
+ #endif
+ UINT64 *stateAsLanes = (UINT64*)state;
+
+ copyFromState(A, stateAsLanes)
+ rounds24
+ copyToState(stateAsLanes, A)
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_Permute_12rounds(void *state)
+{
+ declareABCDE
+ #ifndef KeccakP1600_fullUnrolling
+ unsigned int i;
+ #endif
+ UINT64 *stateAsLanes = (UINT64*)state;
+
+ copyFromState(A, stateAsLanes)
+ rounds12
+ copyToState(stateAsLanes, A)
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
+{
+ UINT64 lane = ((UINT64*)state)[lanePosition];
+#ifdef KeccakP1600_useLaneComplementing
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ lane = ~lane;
+#endif
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ {
+ UINT64 lane1[1];
+ lane1[0] = lane;
+ memcpy(data, (UINT8*)lane1+offset, length);
+ }
+#else
+ unsigned int i;
+ lane >>= offset*8;
+ for(i=0; i<length; i++) {
+ data[i] = lane & 0xFF;
+ lane >>= 8;
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
+void fromWordToBytes(UINT8 *bytes, const UINT64 word)
+{
+ unsigned int i;
+
+ for(i=0; i<(64/8); i++)
+ bytes[i] = (word >> (8*i)) & 0xFF;
+}
+#endif
+
+void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
+{
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ memcpy(data, state, laneCount*8);
+#else
+ unsigned int i;
+
+ for(i=0; i<laneCount; i++)
+ fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
+#endif
+#ifdef KeccakP1600_useLaneComplementing
+ if (laneCount > 1) {
+ ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
+ if (laneCount > 2) {
+ ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
+ if (laneCount > 8) {
+ ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
+ if (laneCount > 12) {
+ ((UINT64*)data)[12] = ~((UINT64*)data)[12];
+ if (laneCount > 17) {
+ ((UINT64*)data)[17] = ~((UINT64*)data)[17];
+ if (laneCount > 20) {
+ ((UINT64*)data)[20] = ~((UINT64*)data)[20];
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
+{
+ SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
+{
+ UINT64 lane = ((UINT64*)state)[lanePosition];
+#ifdef KeccakP1600_useLaneComplementing
+ if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
+ lane = ~lane;
+#endif
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ {
+ unsigned int i;
+ UINT64 lane1[1];
+ lane1[0] = lane;
+ for(i=0; i<length; i++)
+ output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
+ }
+#else
+ unsigned int i;
+ lane >>= offset*8;
+ for(i=0; i<length; i++) {
+ output[i] = input[i] ^ (lane & 0xFF);
+ lane >>= 8;
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
+{
+ unsigned int i;
+#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
+ unsigned char temp[8];
+ unsigned int j;
+#endif
+
+ for(i=0; i<laneCount; i++) {
+#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+ ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
+#else
+ fromWordToBytes(temp, ((const UINT64*)state)[i]);
+ for(j=0; j<8; j++)
+ output[i*8+j] = input[i*8+j] ^ temp[j];
+#endif
+ }
+#ifdef KeccakP1600_useLaneComplementing
+ if (laneCount > 1) {
+ ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
+ if (laneCount > 2) {
+ ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
+ if (laneCount > 8) {
+ ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
+ if (laneCount > 12) {
+ ((UINT64*)output)[12] = ~((UINT64*)output)[12];
+ if (laneCount > 17) {
+ ((UINT64*)output)[17] = ~((UINT64*)output)[17];
+ if (laneCount > 20) {
+ ((UINT64*)output)[20] = ~((UINT64*)output)[20];
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------- */
+
+void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
+{
+ SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
+}
+
+/* ---------------------------------------------------------------- */
+
+size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
+{
+ size_t originalDataByteLen = dataByteLen;
+ declareABCDE
+ #ifndef KeccakP1600_fullUnrolling
+ unsigned int i;
+ #endif
+ UINT64 *stateAsLanes = (UINT64*)state;
+ UINT64 *inDataAsLanes = (UINT64*)data;
+
+ copyFromState(A, stateAsLanes)
+ while(dataByteLen >= laneCount*8) {
+ addInput(A, inDataAsLanes, laneCount)
+ rounds24
+ inDataAsLanes += laneCount;
+ dataByteLen -= laneCount*8;
+ }
+ copyToState(stateAsLanes, A)
+ return originalDataByteLen - dataByteLen;
+}
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#if (defined(FullUnrolling))
+#define rounds24 \
+ prepareTheta \
+ thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
+ thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
+ thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(10, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(11, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(12, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(13, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(14, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(15, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(16, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(17, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(18, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(19, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(20, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(21, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(22, A, E) \
+ thetaRhoPiChiIota(23, E, A) \
+
+#define rounds12 \
+ prepareTheta \
+ thetaRhoPiChiIotaPrepareTheta(12, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(13, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(14, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(15, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(16, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(17, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(18, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(19, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(20, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(21, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(22, A, E) \
+ thetaRhoPiChiIota(23, E, A) \
+
+#elif (Unrolling == 12)
+#define rounds24 \
+ prepareTheta \
+ for(i=0; i<24; i+=12) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
+ } \
+
+#define rounds12 \
+ prepareTheta \
+ thetaRhoPiChiIotaPrepareTheta(12, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(13, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(14, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(15, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(16, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(17, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(18, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(19, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(20, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(21, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(22, A, E) \
+ thetaRhoPiChiIota(23, E, A) \
+
+#elif (Unrolling == 6)
+#define rounds24 \
+ prepareTheta \
+ for(i=0; i<24; i+=6) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
+ } \
+
+#define rounds12 \
+ prepareTheta \
+ for(i=12; i<24; i+=6) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
+ } \
+
+#elif (Unrolling == 4)
+#define rounds24 \
+ prepareTheta \
+ for(i=0; i<24; i+=4) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
+ } \
+
+#define rounds12 \
+ prepareTheta \
+ for(i=12; i<24; i+=4) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
+ } \
+
+#elif (Unrolling == 3)
+#define rounds24 \
+ prepareTheta \
+ for(i=0; i<24; i+=3) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ copyStateVariables(A, E) \
+ } \
+
+#define rounds12 \
+ prepareTheta \
+ for(i=12; i<24; i+=3) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
+ copyStateVariables(A, E) \
+ } \
+
+#elif (Unrolling == 2)
+#define rounds24 \
+ prepareTheta \
+ for(i=0; i<24; i+=2) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ } \
+
+#define rounds12 \
+ prepareTheta \
+ for(i=12; i<24; i+=2) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ } \
+
+#elif (Unrolling == 1)
+#define rounds24 \
+ prepareTheta \
+ for(i=0; i<24; i++) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ copyStateVariables(A, E) \
+ } \
+
+#define rounds12 \
+ prepareTheta \
+ for(i=12; i<24; i++) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ copyStateVariables(A, E) \
+ } \
+
+#else
+#error "Unrolling is not correctly specified!"
+#endif
+
+#define roundsN(__nrounds) \
+ prepareTheta \
+ i = 24 - (__nrounds); \
+ if ((i&1) != 0) { \
+ thetaRhoPiChiIotaPrepareTheta(i, A, E) \
+ copyStateVariables(A, E) \
+ ++i; \
+ } \
+ for( /* empty */; i<24; i+=2) { \
+ thetaRhoPiChiIotaPrepareTheta(i , A, E) \
+ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
+ }
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#include "KeccakSponge.h"
+
+#ifdef KeccakReference
+ #include "displayIntermediateValues.h"
+#endif
+
+#ifndef KeccakP200_excluded
+ #include "KeccakP-200-SnP.h"
+
+ #define prefix KeccakWidth200
+ #define SnP KeccakP200
+ #define SnP_width 200
+ #define SnP_Permute KeccakP200_Permute_18rounds
+ #if defined(KeccakF200_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP400_excluded
+ #include "KeccakP-400-SnP.h"
+
+ #define prefix KeccakWidth400
+ #define SnP KeccakP400
+ #define SnP_width 400
+ #define SnP_Permute KeccakP400_Permute_20rounds
+ #if defined(KeccakF400_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP800_excluded
+ #include "KeccakP-800-SnP.h"
+
+ #define prefix KeccakWidth800
+ #define SnP KeccakP800
+ #define SnP_width 800
+ #define SnP_Permute KeccakP800_Permute_22rounds
+ #if defined(KeccakF800_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+
+ #define prefix KeccakWidth1600
+ #define SnP KeccakP1600
+ #define SnP_width 1600
+ #define SnP_Permute KeccakP1600_Permute_24rounds
+ #if defined(KeccakF1600_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+
+ #define prefix KeccakWidth1600_12rounds
+ #define SnP KeccakP1600
+ #define SnP_width 1600
+ #define SnP_Permute KeccakP1600_Permute_12rounds
+ #if defined(KeccakP1600_12rounds_FastLoop_supported)
+ #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb
+ #endif
+ #include "KeccakSponge.inc"
+ #undef prefix
+ #undef SnP
+ #undef SnP_width
+ #undef SnP_Permute
+ #undef SnP_FastLoop_Absorb
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _KeccakSponge_h_
+#define _KeccakSponge_h_
+
+/** General information
+ *
+ * The following type and functions are not actually implemented. Their
+ * documentation is generic, with the prefix Prefix replaced by
+ * - KeccakWidth200 for a sponge function based on Keccak-f[200]
+ * - KeccakWidth400 for a sponge function based on Keccak-f[400]
+ * - KeccakWidth800 for a sponge function based on Keccak-f[800]
+ * - KeccakWidth1600 for a sponge function based on Keccak-f[1600]
+ *
+ * In all these functions, the rate and capacity must sum to the width of the
+ * chosen permutation. For instance, to use the sponge function
+ * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination
+ * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(),
+ * KeccakWidth1600_SpongeAbsorbLastFewBits() and
+ * KeccakWidth1600_SpongeSqueeze().
+ *
+ * The Prefix_SpongeInstance contains the sponge instance attributes for use
+ * with the Prefix_Sponge* functions.
+ * It gathers the state processed by the permutation as well as the rate,
+ * the position of input/output bytes in the state and the phase
+ * (absorbing or squeezing).
+ */
+
+#ifdef DontReallyInclude_DocumentationOnly
+/** Function to evaluate the sponge function Keccak[r, c] in a single call.
+ * @param rate The value of the rate r.
+ * @param capacity The value of the capacity c.
+ * @param input Pointer to the input message (before the suffix).
+ * @param inputByteLen The length of the input message in bytes.
+ * @param suffix Byte containing from 0 to 7 suffix bits
+ * that must be absorbed after @a input.
+ * These <i>n</i> bits must be in the least significant bit positions.
+ * These bits must be delimited with a bit 1 at position <i>n</i>
+ * (counting from 0=LSB to 7=MSB) and followed by bits 0
+ * from position <i>n</i>+1 to position 7.
+ * Some examples:
+ * - If no bits are to be absorbed, then @a suffix must be 0x01.
+ * - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04.
+ * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32.
+ * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B.
+ * .
+ * @param output Pointer to the output buffer.
+ * @param outputByteLen The desired number of output bytes.
+ * @pre One must have r+c equal to the supported width of this implementation
+ * and the rate a multiple of 8 bits (one byte) in this implementation.
+ * @pre @a suffix ≠ 0x00
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen);
+
+/**
+ * Function to initialize the state of the Keccak[r, c] sponge function.
+ * The phase of the sponge function is set to absorbing.
+ * @param spongeInstance Pointer to the sponge instance to be initialized.
+ * @param rate The value of the rate r.
+ * @param capacity The value of the capacity c.
+ * @pre One must have r+c equal to the supported width of this implementation
+ * and the rate a multiple of 8 bits (one byte) in this implementation.
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity);
+
+/**
+ * Function to give input data bytes for the sponge function to absorb.
+ * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
+ * @param data Pointer to the input data.
+ * @param dataByteLen The number of input bytes provided in the input data.
+ * @pre The sponge function must be in the absorbing phase,
+ * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
+ * must not have been called before.
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen);
+
+/**
+ * Function to give input data bits for the sponge function to absorb
+ * and then to switch to the squeezing phase.
+ * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
+ * @param delimitedData Byte containing from 0 to 7 trailing bits
+ * that must be absorbed.
+ * These <i>n</i> bits must be in the least significant bit positions.
+ * These bits must be delimited with a bit 1 at position <i>n</i>
+ * (counting from 0=LSB to 7=MSB) and followed by bits 0
+ * from position <i>n</i>+1 to position 7.
+ * Some examples:
+ * - If no bits are to be absorbed, then @a delimitedData must be 0x01.
+ * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04.
+ * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32.
+ * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B.
+ * .
+ * @pre The sponge function must be in the absorbing phase,
+ * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
+ * must not have been called before.
+ * @pre @a delimitedData ≠ 0x00
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData);
+
+/**
+ * Function to squeeze output data from the sponge function.
+ * If the sponge function was in the absorbing phase, this function
+ * switches it to the squeezing phase
+ * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called.
+ * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
+ * @param data Pointer to the buffer where to store the output data.
+ * @param dataByteLen The number of output bytes desired.
+ * @return Zero if successful, 1 otherwise.
+ */
+int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
+#endif
+
+#include <string.h>
+#include "align.h"
+
+#define KCP_DeclareSpongeStructure(prefix, size, alignment) \
+ ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \
+ unsigned char state[size]; \
+ unsigned int rate; \
+ unsigned int byteIOIndex; \
+ int squeezing; \
+ } prefix##_SpongeInstance;
+
+#define KCP_DeclareSpongeFunctions(prefix) \
+ int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \
+ int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \
+ int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \
+ int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \
+ int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
+
+#ifndef KeccakP200_excluded
+ #include "KeccakP-200-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth200)
+#endif
+
+#ifndef KeccakP400_excluded
+ #include "KeccakP-400-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth400)
+#endif
+
+#ifndef KeccakP800_excluded
+ #include "KeccakP-800-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth800)
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth1600)
+#endif
+
+#ifndef KeccakP1600_excluded
+ #include "KeccakP-1600-SnP.h"
+ KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
+ KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds)
+#endif
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#define JOIN0(a, b) a ## b
+#define JOIN(a, b) JOIN0(a, b)
+
+#define Sponge JOIN(prefix, _Sponge)
+#define SpongeInstance JOIN(prefix, _SpongeInstance)
+#define SpongeInitialize JOIN(prefix, _SpongeInitialize)
+#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb)
+#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits)
+#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze)
+
+#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes)
+#define SnP_stateAlignment JOIN(SnP, _stateAlignment)
+#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
+#define SnP_Initialize JOIN(SnP, _Initialize)
+#define SnP_AddByte JOIN(SnP, _AddByte)
+#define SnP_AddBytes JOIN(SnP, _AddBytes)
+#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
+
+int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen)
+{
+ ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes];
+ unsigned int partialBlock;
+ const unsigned char *curInput = input;
+ unsigned char *curOutput = output;
+ unsigned int rateInBytes = rate/8;
+
+ if (rate+capacity != SnP_width)
+ return 1;
+ if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0))
+ return 1;
+ if (suffix == 0)
+ return 1;
+
+ /* Initialize the state */
+ SnP_StaticInitialize();
+ SnP_Initialize(state);
+
+ /* First, absorb whole blocks */
+#ifdef SnP_FastLoop_Absorb
+ if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) {
+ /* fast lane: whole lane rate */
+ size_t j;
+ j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen);
+ curInput += j;
+ inputByteLen -= j;
+ }
+#endif
+ while(inputByteLen >= (size_t)rateInBytes) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curInput, rateInBytes);
+ #endif
+ SnP_AddBytes(state, curInput, 0, rateInBytes);
+ SnP_Permute(state);
+ curInput += rateInBytes;
+ inputByteLen -= rateInBytes;
+ }
+
+ /* Then, absorb what remains */
+ partialBlock = (unsigned int)inputByteLen;
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock);
+ #endif
+ SnP_AddBytes(state, curInput, 0, partialBlock);
+
+ /* Finally, absorb the suffix */
+ #ifdef KeccakReference
+ {
+ unsigned char delimitedData1[1];
+ delimitedData1[0] = suffix;
+ displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1);
+ }
+ #endif
+ /* Last few bits, whose delimiter coincides with first bit of padding */
+ SnP_AddByte(state, suffix, partialBlock);
+ /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */
+ if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1)))
+ SnP_Permute(state);
+ /* Second bit of padding */
+ SnP_AddByte(state, 0x80, rateInBytes-1);
+ #ifdef KeccakReference
+ {
+ unsigned char block[SnP_width/8];
+ memset(block, 0, SnP_width/8);
+ block[rateInBytes-1] = 0x80;
+ displayBytes(1, "Second bit of padding", block, rateInBytes);
+ }
+ #endif
+ SnP_Permute(state);
+ #ifdef KeccakReference
+ displayText(1, "--- Switching to squeezing phase ---");
+ #endif
+
+ /* First, output whole blocks */
+ while(outputByteLen > (size_t)rateInBytes) {
+ SnP_ExtractBytes(state, curOutput, 0, rateInBytes);
+ SnP_Permute(state);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block", curOutput, rateInBytes);
+ #endif
+ curOutput += rateInBytes;
+ outputByteLen -= rateInBytes;
+ }
+
+ /* Finally, output what remains */
+ partialBlock = (unsigned int)outputByteLen;
+ SnP_ExtractBytes(state, curOutput, 0, partialBlock);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block (part)", curOutput, partialBlock);
+ #endif
+
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+/* ---------------------------------------------------------------- */
+/* ---------------------------------------------------------------- */
+
+int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity)
+{
+ if (rate+capacity != SnP_width)
+ return 1;
+ if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0))
+ return 1;
+ SnP_StaticInitialize();
+ SnP_Initialize(instance->state);
+ instance->rate = rate;
+ instance->byteIOIndex = 0;
+ instance->squeezing = 0;
+
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen)
+{
+ size_t i, j;
+ unsigned int partialBlock;
+ const unsigned char *curData;
+ unsigned int rateInBytes = instance->rate/8;
+
+ if (instance->squeezing)
+ return 1; /* Too late for additional input */
+
+ i = 0;
+ curData = data;
+ while(i < dataByteLen) {
+ if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) {
+#ifdef SnP_FastLoop_Absorb
+ /* processing full blocks first */
+ if ((rateInBytes % (SnP_width/200)) == 0) {
+ /* fast lane: whole lane rate */
+ j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i);
+ i += j;
+ curData += j;
+ }
+ else {
+#endif
+ for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed", curData, rateInBytes);
+ #endif
+ SnP_AddBytes(instance->state, curData, 0, rateInBytes);
+ SnP_Permute(instance->state);
+ curData+=rateInBytes;
+ }
+ i = dataByteLen - j;
+#ifdef SnP_FastLoop_Absorb
+ }
+#endif
+ }
+ else {
+ /* normal lane: using the message queue */
+ partialBlock = (unsigned int)(dataByteLen - i);
+ if (partialBlock+instance->byteIOIndex > rateInBytes)
+ partialBlock = rateInBytes-instance->byteIOIndex;
+ #ifdef KeccakReference
+ displayBytes(1, "Block to be absorbed (part)", curData, partialBlock);
+ #endif
+ i += partialBlock;
+
+ SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock);
+ curData += partialBlock;
+ instance->byteIOIndex += partialBlock;
+ if (instance->byteIOIndex == rateInBytes) {
+ SnP_Permute(instance->state);
+ instance->byteIOIndex = 0;
+ }
+ }
+ }
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData)
+{
+ unsigned int rateInBytes = instance->rate/8;
+
+ if (delimitedData == 0)
+ return 1;
+ if (instance->squeezing)
+ return 1; /* Too late for additional input */
+
+ #ifdef KeccakReference
+ {
+ unsigned char delimitedData1[1];
+ delimitedData1[0] = delimitedData;
+ displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1);
+ }
+ #endif
+ /* Last few bits, whose delimiter coincides with first bit of padding */
+ SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex);
+ /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */
+ if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1)))
+ SnP_Permute(instance->state);
+ /* Second bit of padding */
+ SnP_AddByte(instance->state, 0x80, rateInBytes-1);
+ #ifdef KeccakReference
+ {
+ unsigned char block[SnP_width/8];
+ memset(block, 0, SnP_width/8);
+ block[rateInBytes-1] = 0x80;
+ displayBytes(1, "Second bit of padding", block, rateInBytes);
+ }
+ #endif
+ SnP_Permute(instance->state);
+ instance->byteIOIndex = 0;
+ instance->squeezing = 1;
+ #ifdef KeccakReference
+ displayText(1, "--- Switching to squeezing phase ---");
+ #endif
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen)
+{
+ size_t i, j;
+ unsigned int partialBlock;
+ unsigned int rateInBytes = instance->rate/8;
+ unsigned char *curData;
+
+ if (!instance->squeezing)
+ SpongeAbsorbLastFewBits(instance, 0x01);
+
+ i = 0;
+ curData = data;
+ while(i < dataByteLen) {
+ if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) {
+ for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
+ SnP_Permute(instance->state);
+ SnP_ExtractBytes(instance->state, curData, 0, rateInBytes);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block", curData, rateInBytes);
+ #endif
+ curData+=rateInBytes;
+ }
+ i = dataByteLen - j;
+ }
+ else {
+ /* normal lane: using the message queue */
+ if (instance->byteIOIndex == rateInBytes) {
+ SnP_Permute(instance->state);
+ instance->byteIOIndex = 0;
+ }
+ partialBlock = (unsigned int)(dataByteLen - i);
+ if (partialBlock+instance->byteIOIndex > rateInBytes)
+ partialBlock = rateInBytes-instance->byteIOIndex;
+ i += partialBlock;
+
+ SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock);
+ #ifdef KeccakReference
+ displayBytes(1, "Squeezed block (part)", curData, partialBlock);
+ #endif
+ curData += partialBlock;
+ instance->byteIOIndex += partialBlock;
+ }
+ }
+ return 0;
+}
+
+/* ---------------------------------------------------------------- */
+
+#undef Sponge
+#undef SpongeInstance
+#undef SpongeInitialize
+#undef SpongeAbsorb
+#undef SpongeAbsorbLastFewBits
+#undef SpongeSqueeze
+#undef SnP_stateSizeInBytes
+#undef SnP_stateAlignment
+#undef SnP_StaticInitialize
+#undef SnP_Initialize
+#undef SnP_AddByte
+#undef SnP_AddBytes
+#undef SnP_ExtractBytes
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _SnP_Relaned_h_
+#define _SnP_Relaned_h_
+
+#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \
+ SnP_AddBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ const unsigned char *_curData = (data); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curData += _bytesInLane; \
+ } \
+ } \
+ }
+
+#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \
+ SnP_OverwriteBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ const unsigned char *_curData = (data); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curData += _bytesInLane; \
+ } \
+ } \
+ }
+
+#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \
+ SnP_ExtractBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ unsigned char *_curData = (data); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curData += _bytesInLane; \
+ } \
+ } \
+ }
+
+#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \
+ { \
+ if ((offset) == 0) { \
+ SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \
+ SnP_ExtractAndAddBytesInLane(state, \
+ (length)/SnP_laneLengthInBytes, \
+ (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
+ 0, \
+ (length)%SnP_laneLengthInBytes); \
+ } \
+ else { \
+ unsigned int _sizeLeft = (length); \
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
+ const unsigned char *_curInput = (input); \
+ unsigned char *_curOutput = (output); \
+ while(_sizeLeft > 0) { \
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
+ if (_bytesInLane > _sizeLeft) \
+ _bytesInLane = _sizeLeft; \
+ SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \
+ _sizeLeft -= _bytesInLane; \
+ _lanePosition++; \
+ _offsetInLane = 0; \
+ _curInput += _bytesInLane; \
+ _curOutput += _bytesInLane; \
+ } \
+ } \
+ }
+
+#endif
--- /dev/null
+/*
+Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
+Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
+denoted as "the implementer".
+
+For more information, feedback or questions, please refer to our websites:
+http://keccak.noekeon.org/
+http://keyak.noekeon.org/
+http://ketje.noekeon.org/
+
+To the extent possible under law, the implementer has waived all copyright
+and related or neighboring rights to the source code in this file.
+http://creativecommons.org/publicdomain/zero/1.0/
+*/
+
+#ifndef _align_h_
+#define _align_h_
+
+/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
+#ifdef ALIGN
+#undef ALIGN
+#endif
+
+#if defined(__GNUC__)
+#define ALIGN(x) __attribute__ ((aligned(x)))
+#elif defined(_MSC_VER)
+#define ALIGN(x) __declspec(align(x))
+#elif defined(__ARMCC_VERSION)
+#define ALIGN(x) __align(x)
+#else
+#define ALIGN(x)
+#endif
+
+#endif
--- /dev/null
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+ 1. source code distributions include the above copyright notice, this
+ list of conditions and the following disclaimer;
+
+ 2. binary distributions include the above copyright notice, this list
+ of conditions and the following disclaimer in their documentation;
+
+ 3. the name of the copyright holder is not used to endorse products
+ built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+ Changes for ARM 9/9/2010
+*/
+
+#ifndef _BRG_ENDIAN_H
+#define _BRG_ENDIAN_H
+
+#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+
+#if 0
+/* Include files where endian defines and byteswap functions may reside */
+#if defined( __sun )
+# include <sys/isa_defs.h>
+#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
+# include <sys/endian.h>
+#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
+ defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
+# include <machine/endian.h>
+#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
+# if !defined( __MINGW32__ ) && !defined( _AIX )
+# include <endian.h>
+# if !defined( __BEOS__ )
+# include <byteswap.h>
+# endif
+# endif
+#endif
+#endif
+
+/* Now attempt to set the define for platform byte order using any */
+/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
+/* seem to encompass most endian symbol definitions */
+
+#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
+# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
+# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( _BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( _LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
+# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
+# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif defined( __BIG_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN__ )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+/* if the platform byte order could not be determined, then try to */
+/* set this define using common machine defines */
+#if !defined(PLATFORM_BYTE_ORDER)
+
+#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
+ defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
+ defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
+ defined( vax ) || defined( vms ) || defined( VMS ) || \
+ defined( __VMS ) || defined( _M_X64 )
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+
+#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
+ defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
+ defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
+ defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
+ defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
+ defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
+ defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+
+#elif defined(__arm__)
+# ifdef __BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+# else
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+# endif
+#elif 1 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0 /* **** EDIT HERE IF NECESSARY **** */
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
+#endif
+
+#endif
+
+#endif