/*
 * Copyright (c) 2016
 * Distributed Systems Software.  All rights reserved.
 * See the file LICENSE for redistribution information.
 *
 * $Id: $
 */

/*
 * Adapted from the Argon2 distribution:
 *   https://github.com/p-h-c/phc-winner-argon2
 * 2-Jun-2016 (Commit d7266c4)
 * License: CC0 (Creative Commons)

 * See:
 * https://www.cryptolux.org/images/e/e1/Draft-irtf-cfrg-argon2.txt
 */

/*
 * Argon2 source code package
 *
 * Written by Daniel Dinu and Dmitry Khovratovich, 2015
 *
 * This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
 *
 * You should have received a copy of the CC0 Public Domain Dedication along
 * with this software. If not, see
 * <http://creativecommons.org/publicdomain/zero/1.0/>.
 */

/* For memory wiping. */
#if defined __STDC_LIB_EXT1__
#define __STDC_WANT_LIB_EXT1__ 1
#endif

#include <time.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <limits.h>
#include <stddef.h>
#include <pthread.h>

#include "argon2.h"

#define CONST_CAST(x) (x)(uintptr_t)

enum argon2_core_constants {
  /* Memory block size in bytes. */
  ARGON2_BLOCK_SIZE = 1024,
  ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
  ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,

  /*
   * Number of pseudo-random values generated by one call to Blake in Argon2i
   * to generate reference block positions.
   */
  ARGON2_ADDRESSES_IN_BLOCK = 128,

  /* Pre-hashing digest length and its extension. */
  ARGON2_PREHASH_DIGEST_LENGTH = 64,
  ARGON2_PREHASH_SEED_LENGTH   = 72
};

/*
 * Structure for the (1KB) memory block implemented as 128 64-bit words.
 * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
 * bounds checking).
 */
typedef struct block_ {
  uint64_t v[ARGON2_QWORDS_IN_BLOCK];
} Block;

/*
 * Argon2 instance, used to evaluate the number and location of blocks
 * to construct in each thread.
 */
typedef struct Argon2_instance_t {
  Block *memory;
  uint32_t version;
  uint32_t passes;
  uint32_t memory_blocks;	/* Number of blocks in memory */
  uint32_t segment_length;
  uint32_t lane_length;
  uint32_t lanes;
  uint32_t threads;
  Argon2_type type;
  int print_internals;		/* Whether to print the memory blocks. */
} Argon2_instance_t;

/*
 * Argon2 position: where we construct the block right now. Used to distribute
 * work between threads.
 */
typedef struct Argon2_position_t {
  uint32_t pass;
  uint32_t lane;
  uint8_t slice;
  uint32_t index;
} Argon2_position_t;

/* Struct that holds the inputs for thread handling FillSegment. */
typedef struct Argon2_thread_data {
  Argon2_instance_t *instance_ptr;
  Argon2_position_t pos;
} Argon2_thread_data;

enum {
  ARGON2_MAX_DECODED_LANES = UINT32_C(255),
  ARGON2_MIN_DECODED_SALT_LEN = UINT32_C(8),
  ARGON2_MIN_DECODED_OUT_LEN = UINT32_C(12)
};

/*
 * We implement an abstraction layer for the simpĺe requirements
 * of the Argon2 code. We only require 3 primitives - thread creation,
 * joining, and termination - so full emulation of the pthreads API
 * is unwarranted. Currently we wrap pthreads.
 *
 * The API defines 2 types: the function pointer type,
 * Argon2_thread_func_t, and the type of the thread handle,
 * Argon2_thread_handle_t.
 */

typedef void *(*Argon2_thread_func_t)(void *);
typedef pthread_t Argon2_thread_handle_t;

static int allocate_memory(Block **memory, uint32_t m_cost);
static void secure_wipe_memory(void *v, size_t n);
static void clear_memory(Argon2_instance_t *instance, int clear);
static void free_memory(Block *memory);
static uint32_t index_alpha(const Argon2_instance_t *instance,
							const Argon2_position_t *position,
							uint32_t pseudo_rand, int same_lane);
static int validate_inputs(const Argon2_context *context);
static int initial_hash(uint8_t *blockhash, Argon2_context *context,
						Argon2_type type);
static void fill_first_blocks(uint8_t *blockhash,
							  const Argon2_instance_t *instance);
static int initialize(Argon2_instance_t *instance, Argon2_context *context);
static void finalize(const Argon2_context *context, Argon2_instance_t *instance);
static void fill_segment(const Argon2_instance_t *instance,
						 Argon2_position_t position);
static int fill_memory_blocks(Argon2_instance_t *instance);

static void init_block_value(Block *b, uint8_t in);
static void copy_block(Block *dst, const Block *src);
static void xor_block(Block *dst, const Block *src);

static int argon2_thread_create(Argon2_thread_handle_t *handle,
								Argon2_thread_func_t func, void *args);
static int argon2_thread_join(Argon2_thread_handle_t handle);
static void argon2_thread_exit(void);

static int encode_string(char *dst, size_t dst_len, Argon2_context *ctx,
						 Argon2_type type);
static int decode_string(Argon2_context *ctx, const char *str, Argon2_type type);

static void fill_block_with_xor(const Block *prev_block, const Block *ref_block,
								Block *next_block);
static void fill_block(const Block *prev_block, const Block *ref_block,
					   Block *next_block);
static void generate_addresses(const Argon2_instance_t *instance,
							   const Argon2_position_t *position,
							   uint64_t *pseudo_rands);

/*
 * Return the length in bytes of the base64-encoded byte representation of a
 * LEN-byte binary value.
 */
static size_t
b64len(uint32_t len)
{

  return((((size_t) len + 2) / 3) * 4);
}

/*
 * Return the length in bytes of the base-10 string representation of NUM.
 */
static size_t
numlen(uint32_t num)
{
  size_t len;

  len = 1;
  while (num >= 10) {
	++len;
	num = num / 10;
  }

  return(len);
}

static uint8_t *prehash_digest_buf = NULL;

static void
err(const char *error)
{

  fprintf(stderr, "Error: %s\n", error);
}

static void
print_hex(FILE *fp, uint8_t *bytes, size_t bytes_len)
{
  int i, r;

  r = 0;
  for (i = 0; i < bytes_len; i++) {
	if (r == 8) {
	  fprintf(fp, "\n");
	  r = 0;
	}
	else if (r)
	  fprintf(fp, " ");
	fprintf(fp, "%02x", bytes[i]);
	r++;
  }

  if (r)
	fprintf(fp, "\n");
}

/* Designed by the Lyra PHC team. */
static BLAKE2_INLINE uint64_t
fBlaMka(uint64_t x, uint64_t y)
{
  const uint64_t m = UINT64_C(0xFFFFFFFF);
  const uint64_t xy = (x & m) * (y & m);

  return(x + y + 2 * xy);
}

#define G(a, b, c, d)													\
  do {																	\
	a = fBlaMka(a, b);													\
	d = rotr64(d ^ a, 32);												\
	c = fBlaMka(c, d);													\
	b = rotr64(b ^ c, 24);												\
	a = fBlaMka(a, b);													\
	d = rotr64(d ^ a, 16);												\
	c = fBlaMka(c, d);													\
	b = rotr64(b ^ c, 63);												\
  } while ((void) 0, 0)

#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, \
						   v11, v12, v13, v14, v15)						\
  do {																	\
	G(v0, v4, v8, v12);													\
	G(v1, v5, v9, v13);													\
	G(v2, v6, v10, v14);												\
	G(v3, v7, v11, v15);												\
	G(v0, v5, v10, v15);												\
	G(v1, v6, v11, v12);												\
	G(v2, v7, v8, v13);													\
	G(v3, v4, v9, v14);													\
  } while ((void) 0, 0)

#if defined(__clang__)
#if __has_attribute(optnone)
#define NOT_OPTIMIZED __attribute__((optnone))
#endif
#elif defined(__GNUC__)
#define GCC_VERSION													\
  (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if GCC_VERSION >= 40400
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
#endif
#endif
#ifndef NOT_OPTIMIZED
#define NOT_OPTIMIZED
#endif

/*
 * Initialize each byte of block B with IN.
 */
static void
init_block_value(Block *b, uint8_t in)
{

  memset(b->v, in, sizeof(b->v));
}

/*
 * Copy block SRC to block DST.
 */
static void
copy_block(Block *dst, const Block *src)
{

  memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
}

/*
 * XOR SRC onto DST bytewise.
 */
static void
xor_block(Block *dst, const Block *src)
{
  int i;

  for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i)
	dst->v[i] ^= src->v[i];
}

static void
load_block(Block *dst, const void *input)
{
  unsigned int i;

  for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i)
	dst->v[i] = load64((const uint8_t *) input + i * sizeof(dst->v[i]));
}

static void
store_block(void *output, const Block *src)
{
  unsigned int i;

  for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i)
	store64((uint8_t *) output + i * sizeof(src->v[i]), src->v[i]);
}


/*
 * Allocates memory to the given pointer
 * MEMORY: pointer to the pointer to the memory
 * M_COST: number of blocks to allocate in the memory
 * Return ARGON2_OK if MEMORY is a valid pointer and memory is allocated.
 */
static int
allocate_memory(Block **memory, uint32_t m_cost)
{

  if (memory != NULL) {
	size_t memory_size = sizeof(Block) * m_cost;

	if (m_cost != 0 && memory_size / m_cost != sizeof(Block)) {
	  /* 1. Check for multiplication overflow. */
	  return(ARGON2_MEMORY_ALLOCATION_ERROR);
	}

	*memory = (Block *) malloc(memory_size); /* 2. Try to allocate. */

	if (!*memory)
	  return(ARGON2_MEMORY_ALLOCATION_ERROR);

	return(ARGON2_OK);
  }
  else
	return(ARGON2_MEMORY_ALLOCATION_ERROR);
}

/*
 * Securely clean memory.
 * V: Pointer to the memory
 * N: Memory size in bytes
 */
static void NOT_OPTIMIZED
secure_wipe_memory(void *v, size_t n)
{
#if defined memset_s
  memset_s(v, n, 0, n);
#elif defined(__OpenBSD__)
  explicit_bzero(v, n);
#else
  static void *(*const volatile memset_sec)(void *, int, size_t) = &memset;
  memset_sec(v, 0, n);
#endif
}

/*
 * Clears memory
 * INSTANCE: pointer to the current instance
 * CLEAR: indicates if we clear the memory with zeros.
 */
static void
clear_memory(Argon2_instance_t *instance, int clear)
{

  if (instance->memory != NULL && clear) {
	secure_wipe_memory(instance->memory,
					   sizeof(Block) * instance->memory_blocks);
  }
}

/*
 * Deallocates memory
 * MEMORY: pointer to the blocks
 */
static void
free_memory(Block *memory)
{

  free(memory);
}

/*
 * XOR the last block of each lane, hashing it, making the tag. Deallocate
 * the memory.
 * @param context Pointer to current Argon2 context (use only the out parameters
 * from it)
 * @param instance Pointer to current instance of Argon2
 * @pre instance->state must point to necessary amount of memory
 * @pre context->out must point to outlen bytes of memory
 * @pre if context->free_cbk is not NULL, it should point to a function that
 * deallocates memory
 */
static void
finalize(const Argon2_context *context, Argon2_instance_t *instance)
{
  if (context != NULL && instance != NULL) {
	Block blockhash;
	uint32_t l;

	copy_block(&blockhash, instance->memory + instance->lane_length - 1);

	/* XOR the last blocks */
	for (l = 1; l < instance->lanes; ++l) {
	  uint32_t last_block_in_lane
		= l * instance->lane_length + (instance->lane_length - 1);
	  xor_block(&blockhash, instance->memory + last_block_in_lane);
	}

	/* Hash the result */
	{
	  uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
	  store_block(blockhash_bytes, &blockhash);
	  blake2b_long(context->out, context->outlen, blockhash_bytes,
				   ARGON2_BLOCK_SIZE);
	  secure_wipe_memory(blockhash.v,
						 ARGON2_BLOCK_SIZE); /* clear blockhash */
	  secure_wipe_memory(blockhash_bytes,
						 ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */
	}

	clear_memory(instance, context->flags & ARGON2_FLAG_CLEAR_PASSWORD);

	/* Deallocate the memory. */
	if (NULL != context->free_cbk) {
	  context->free_cbk((uint8_t *)instance->memory,
						instance->memory_blocks * sizeof(Block));
	}
	else
	  free_memory(instance->memory);
  }
}

/*
 * Compute absolute position of reference block in the lane following a skewed
 * distribution and using a pseudo-random value as input.
 * INSTANCE: Pointer to the current instance
 * POSITION: Pointer to the current position
 * PSEUDO_RAND: 32-bit pseudo-random value used to determine the position
 * SAME_LANE: Indicates if the block will be taken from the current lane.
 * If so we can reference the current segment.
 * All pointers must be valid.
 */
static uint32_t
index_alpha(const Argon2_instance_t *instance,
			const Argon2_position_t *position, uint32_t pseudo_rand,
			int same_lane)
{
  uint32_t reference_area_size;
  uint64_t relative_position;
  uint32_t start_position, absolute_position;

  /*
   * Pass 0:
   *      This lane : all already finished segments plus already constructed
   * blocks in this segment
   *      Other lanes : all already finished segments
   * Pass 1+:
   *      This lane : (SYNC_POINTS - 1) last segments plus already constructed
   * blocks in this segment
   *      Other lanes : (SYNC_POINTS - 1) last segments
   */
  if (0 == position->pass) {
	/* First pass */
	if (0 == position->slice) {
	  /* First slice */
	  reference_area_size = position->index - 1; /* all but the previous */
	}
	else {
	  if (same_lane) {
		/* The same lane => add current segment */
		reference_area_size =
		  position->slice * instance->segment_length
		  + position->index - 1;
	  }
	  else {
		reference_area_size =
		  position->slice * instance->segment_length
		  + ((position->index == 0) ? (-1) : 0);
	  }
	}
  }
  else {
	/* Second pass */
	if (same_lane) {
	  reference_area_size = instance->lane_length
		- instance->segment_length + position->index - 1;
	}
	else {
	  reference_area_size = instance->lane_length
		- instance->segment_length +
		((position->index == 0) ? (-1) : 0);
	}
  }

  /*
   * 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
   * relative position.
  */
  relative_position = pseudo_rand;
  relative_position = relative_position * relative_position >> 32;
  relative_position = reference_area_size - 1
	- (reference_area_size * relative_position >> 32);
  
  /* 1.2.5 Computing starting position. */
  start_position = 0;

  if (0 != position->pass) {
	start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
	  ? 0 : (position->slice + 1) * instance->segment_length;
  }

  /* 1.2.6. Computing absolute position */
  absolute_position = (start_position + relative_position)
	% instance->lane_length; /* absolute position */
  return(absolute_position);
}

/*
 * This becomes a thread created by pthread_create().
 */
static void *
fill_segment_thr(void *thread_data)
{
  Argon2_thread_data *my_data = (Argon2_thread_data *) thread_data;

  fill_segment(my_data->instance_ptr, my_data->pos);
  argon2_thread_exit();

  return(NULL);
}

/*
 * Function that fills the entire memory t_cost times based on the first two
 * blocks in each lane
 * @param instance Pointer to the current instance
 * @return ARGON2_OK if successful, @context->state
 */
static int
fill_memory_blocks(Argon2_instance_t *instance)
{
  uint32_t r, s;
  Argon2_thread_handle_t *thread = NULL;
  Argon2_thread_data *thr_data = NULL;

  if (instance == NULL || instance->lanes == 0)
	return(ARGON2_THREAD_FAIL);

  /* 1. Allocating space for threads. */
  thread = calloc(instance->lanes, sizeof(Argon2_thread_handle_t));
  if (thread == NULL)
	return(ARGON2_MEMORY_ALLOCATION_ERROR);

  thr_data = calloc(instance->lanes, sizeof(Argon2_thread_data));
  if (thr_data == NULL) {
	free(thread);
	return(ARGON2_MEMORY_ALLOCATION_ERROR);
  }

  for (r = 0; r < instance->passes; ++r) {
	for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
	  int rc;
	  uint32_t l;

	  /* 2. Calling threads. */
	  for (l = 0; l < instance->lanes; ++l) {
		Argon2_position_t position;

		/* 2.1 Join a thread if limit is exceeded. */
		if (l >= instance->threads) {
		  rc = argon2_thread_join(thread[l - instance->threads]);
		  if (rc) {
			free(thr_data);
			free(thread);
			return(ARGON2_THREAD_FAIL);
		  }
		}

		/* 2.2 Create thread. */
		position.pass = r;
		position.lane = l;
		position.slice = (uint8_t) s;
		position.index = 0;
		thr_data[l].instance_ptr = instance; /* preparing the thread input */
		memcpy(&(thr_data[l].pos), &position, sizeof(Argon2_position_t));
		rc = argon2_thread_create(&thread[l], &fill_segment_thr,
								  (void *) &thr_data[l]);
		if (rc) {
		  free(thr_data);
		  free(thread);
		  return(ARGON2_THREAD_FAIL);
		}

		/* fill_segment(instance, position); */
		/*Non-thread equivalent of the lines above */
	  }

	  /* 3. Joining remaining threads. */
	  for (l = instance->lanes - instance->threads; l < instance->lanes; ++l) {
		rc = argon2_thread_join(thread[l]);
		if (rc)
		  return(ARGON2_THREAD_FAIL);
	  }
	}
  }

  if (thread != NULL)
	free(thread);

  if (thr_data != NULL)
	free(thr_data);

  return(ARGON2_OK);
}

/*
 * Validate all inputs against predefined restrictions and return an error code.
 * CONTEXT: Pointer to current Argon2 context
 * Return ARGON2_OK if everything is all right, otherwise one of error codes
 * (all defined in <argon2.h>).
 */
static int
validate_inputs(const Argon2_context *context)
{

  if (NULL == context)
	return(ARGON2_INCORRECT_PARAMETER);

  if (NULL == context->out)
	return(ARGON2_OUTPUT_PTR_NULL);

  /* Validate output length */
  if (ARGON2_MIN_OUTLEN > context->outlen)
	return(ARGON2_OUTPUT_TOO_SHORT);

  if (ARGON2_MAX_OUTLEN < context->outlen)
	return(ARGON2_OUTPUT_TOO_LONG);

  /* Validate password length. */
  if (NULL == context->pwd) {
	if (0 != context->pwdlen)
	  return(ARGON2_PWD_PTR_MISMATCH);
  }
  else {
	if (ARGON2_MIN_PWD_LENGTH > context->pwdlen)
	  return(ARGON2_PWD_TOO_SHORT);
	
	if (ARGON2_MAX_PWD_LENGTH < context->pwdlen)
	  return(ARGON2_PWD_TOO_LONG);
  }

  /* Validate salt length. */
  if (NULL == context->salt) {
	if (0 != context->saltlen)
	  return(ARGON2_SALT_PTR_MISMATCH);
  }
  else {
	if (ARGON2_MIN_SALT_LENGTH > context->saltlen)
	  return(ARGON2_SALT_TOO_SHORT);
	
	if (ARGON2_MAX_SALT_LENGTH < context->saltlen)
	  return(ARGON2_SALT_TOO_LONG);
  }

  /* Validate secret length. */
  if (NULL == context->secret) {
	if (0 != context->secretlen)
	  return(ARGON2_SECRET_PTR_MISMATCH);
  }
  else {
	if (ARGON2_MIN_SECRET > context->secretlen)
	  return(ARGON2_SECRET_TOO_SHORT);

	if (ARGON2_MAX_SECRET < context->secretlen)
	  return(ARGON2_SECRET_TOO_LONG);
  }

  /* Validate associated data. */
  if (NULL == context->ad) {
	if (0 != context->adlen)
	  return(ARGON2_AD_PTR_MISMATCH);
  }
  else {
	if (ARGON2_MIN_AD_LENGTH > context->adlen)
	  return(ARGON2_AD_TOO_SHORT);

	if (ARGON2_MAX_AD_LENGTH < context->adlen)
	  return(ARGON2_AD_TOO_LONG);
  }

  /* Validate memory cost. */
  if (ARGON2_MIN_MEMORY > context->m_cost)
	return(ARGON2_MEMORY_TOO_LITTLE);

  if (ARGON2_MAX_MEMORY < context->m_cost)
	return(ARGON2_MEMORY_TOO_MUCH);

  if (context->m_cost < 8 * context->lanes)
	return(ARGON2_MEMORY_TOO_LITTLE);

  /* Validate time cost. */
  if (ARGON2_MIN_TIME > context->t_cost)
	return(ARGON2_TIME_TOO_SMALL);

  if (ARGON2_MAX_TIME < context->t_cost)
	return(ARGON2_TIME_TOO_LARGE);

  /* Validate lanes. */
  if (ARGON2_MIN_LANES > context->lanes)
	return(ARGON2_LANES_TOO_FEW);

  if (ARGON2_MAX_LANES < context->lanes)
	return(ARGON2_LANES_TOO_MANY);

  /* Validate threads. */
  if (ARGON2_MIN_THREADS > context->threads)
	return(ARGON2_THREADS_TOO_FEW);

  if (ARGON2_MAX_THREADS < context->threads)
	return(ARGON2_THREADS_TOO_MANY);

  if (NULL != context->allocate_cbk && NULL == context->free_cbk)
	return(ARGON2_FREE_MEMORY_CBK_NULL);

  if (NULL == context->allocate_cbk && NULL != context->free_cbk)
	return(ARGON2_ALLOCATE_MEMORY_CBK_NULL);

  return(ARGON2_OK);
}

/*
 * Create first two blocks per lane
 * INSTANCE: Pointer to the current instance
 * BLOCKHASH: Pointer to the pre-hashing digest
 * BLOCKHASH must point to PREHASH_SEED_LENGTH allocated values.
 */
static void
fill_first_blocks(uint8_t *blockhash, const Argon2_instance_t *instance)
{
  uint32_t l;
  /*
   * Make the first and second block in each lane as G(H0||i||0) or
   * G(H0||i||1)
   */
  uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];

  for (l = 0; l < instance->lanes; ++l) {
	store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
	store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
	blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
				 ARGON2_PREHASH_SEED_LENGTH);
	load_block(&instance->memory[l * instance->lane_length + 0],
			   blockhash_bytes);

	store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
	blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
				 ARGON2_PREHASH_SEED_LENGTH);
	load_block(&instance->memory[l * instance->lane_length + 1],
			   blockhash_bytes);
  }
  secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
}

static int initial_hash_debug = 0;

/*
 * Hash all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clear
 * password and secret if needed.
 * CONTEXT: Pointer to the Argon2 internal structure containing memory
 * pointer, and parameters for time and space requirements.
 * BLOCKHASH: Buffer for pre-hashing digest
 * TYPE: Argon2 type
 * BLOCKHASH must have at least PREHASH_DIGEST_LENGTH bytes allocated.
 */
static int
initial_hash(uint8_t *blockhash, Argon2_context *context, Argon2_type type)
{
  Blake2b_state BlakeHash;
  uint8_t value[sizeof(uint32_t)];

  if (NULL == context || NULL == blockhash)
	return(-1);

  if (blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH) == -1)
	return(-1);

  /*
   * Computation of H_0, as per draft-irtf-cfrg-argon2-00
   */

  /* p */
  if (initial_hash_debug)
	fprintf(stderr, "p=%d\n", context->lanes);
  store32(&value, context->lanes);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  /* T */
  if (initial_hash_debug)
	fprintf(stderr, "T=%d\n", context->outlen);
  store32(&value, context->outlen);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  /* m */
  if (initial_hash_debug)
	fprintf(stderr, "m=%d\n", context->m_cost);
  store32(&value, context->m_cost);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  /* t */
  if (initial_hash_debug)
	fprintf(stderr, "t=%d\n", context->t_cost);
  store32(&value, context->t_cost);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  /* v */
  if (initial_hash_debug)
	fprintf(stderr, "v=%d\n", context->version);
  store32(&value, context->version);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  /* y */
  if (initial_hash_debug)
	fprintf(stderr, "y=%d\n", type);
  store32(&value, (uint32_t) type);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  /* length(P) */
  if (initial_hash_debug)
	fprintf(stderr, "length(P)=%d\n", context->pwdlen);
  store32(&value, context->pwdlen);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  /* P */
  if (initial_hash_debug) {
	fprintf(stderr, "P:\n");
	print_hex(stderr, context->pwd, context->pwdlen);
  }
  if (context->pwd != NULL) {
	if (blake2b_update(&BlakeHash, (const uint8_t *) context->pwd,
					   context->pwdlen) == -1)
	return(-1);

	if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
	  secure_wipe_memory(context->pwd, context->pwdlen);
	  context->pwdlen = 0;
	}
  }

  /* length(S) */
  if (initial_hash_debug)
	fprintf(stderr, "length(S)=%d\n", context->saltlen);
  store32(&value, context->saltlen);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  if (context->salt != NULL) {
	/* S */
	if (initial_hash_debug) {
	  printf("S:\n"); print_hex(stderr, context->salt, context->saltlen);
	}
	if (blake2b_update(&BlakeHash, (const uint8_t *) context->salt,
					   context->saltlen) == -1)
	  return(-1);
  }

  /* length(K) */
  if (initial_hash_debug)
	fprintf(stderr, "length(K)=%d\n", context->secretlen);
  store32(&value, context->secretlen);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  if (context->secret != NULL) {
	/* K */
	if (initial_hash_debug) {
	  printf("K:\n"); print_hex(stderr, context->secret, context->secretlen);
	}
	if (blake2b_update(&BlakeHash, (const uint8_t *) context->secret,
					   context->secretlen) == -1)
	return(-1);

	if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
	  secure_wipe_memory(context->secret, context->secretlen);
	  context->secretlen = 0;
	}
  }

  /* length(X) */
  if (initial_hash_debug)
	fprintf(stderr, "length(X)=%d\n", context->adlen);
  store32(&value, context->adlen);
  if (blake2b_update(&BlakeHash, (const uint8_t *) &value, sizeof(value)) == -1)
	return(-1);

  if (context->ad != NULL) {
	/* X */
	if (initial_hash_debug) {
	  printf("X:\n"); print_hex(stderr, context->ad, context->adlen);
	}
	if (blake2b_update(&BlakeHash, (const uint8_t *) context->ad,
					   context->adlen) == -1)
	  return(-1);
  }

  if (blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH) == -1)
	return(-1);

  return(0);
}

/*
 * Allocate memory, hash the inputs with Blake, and create first
 * two blocks. Return the pointer to the main memory with two blocks per lane
 * initialized
 * CONTEXT:  Pointer to the Argon2 internal structure containing memory
 * pointer, and parameters for time and space requirements.
 * INSTANCE: Current Argon2 instance
 * Return zero if successful, -1 if memory failed to allocate.
 * CONTEXT->STATE will be modified if successful.
 */
static int
initialize(Argon2_instance_t *instance, Argon2_context *context)
{
  uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
  int result = ARGON2_OK;

  if (instance == NULL || context == NULL)
	return(ARGON2_INCORRECT_PARAMETER);

  /* 1. Memory allocation. */

  if (NULL != context->allocate_cbk) {
	uint8_t *p;

	result = context->allocate_cbk(&p,
								   instance->memory_blocks * ARGON2_BLOCK_SIZE);
	if (ARGON2_OK != result)
	  return(result);

	memcpy(&(instance->memory), p, sizeof(instance->memory));
  }
  else {
	result = allocate_memory(&(instance->memory), instance->memory_blocks);
	if (ARGON2_OK != result)
	  return(result);
  }

  /*
   * 2. Initial hashing
   * H_0 + 8 extra bytes to produce the first blocks
   * uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
   * Hashing all inputs.
   */
  initial_hash(blockhash, context, instance->type);
  if (prehash_digest_buf != NULL)
	memcpy(prehash_digest_buf, blockhash, sizeof(blockhash));

  /* Zeroing eight extra bytes. */
  secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
					 ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);

  /*
   * 3. Creating first blocks, we always have at least two blocks in a slice
   */
  fill_first_blocks(blockhash, instance);

  /* Clearing the hash */
  secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
  
  return(ARGON2_OK);
}

/*
 * Example code for a decoder and encoder of "hash strings", with Argon2
 * parameters.
 *
 * This code comprises three sections:
 *
 *   -- The first section contains generic Base64 encoding and decoding
 *   functions. It is conceptually applicable to any hash function
 *   implementation that uses Base64 to encode and decode parameters,
 *   salts and outputs. It could be made into a library, provided that
 *   the relevant functions are made public (non-static) and be given
 *   reasonable names to avoid collisions with other functions.
 *
 *   -- The second section is specific to Argon2. It encodes and decodes
 *   the parameters, salts and outputs. It does not compute the hash
 *   itself.
 *
 *   -- The third section is test code, with a main() function. With
 *   this section, the whole file compiles as a stand-alone program
 *   that exercises the encoding and decoding functions with some
 *   test vectors.
 *
 * The code was originally written by Thomas Pornin <pornin@bolet.org>,
 * to whom comments and remarks may be sent. It is released under what
 * should amount to Public Domain or its closest equivalent; the
 * following mantra is supposed to incarnate that fact with all the
 * proper legal rituals:
 *
 * ---------------------------------------------------------------------
 * This file is provided under the terms of Creative Commons CC0 1.0
 * Public Domain Dedication. To the extent possible under law, the
 * author (Thomas Pornin) has waived all copyright and related or
 * neighboring rights to this file. This work is published from: Canada.
 * ---------------------------------------------------------------------
 *
 * Copyright (c) 2015 Thomas Pornin
 */

/* ==================================================================== */
/*
 * Common code; could be shared between different hash functions.
 *
 * Note: the Base64 functions below assume that uppercase letters (resp.
 * lowercase letters) have consecutive numerical codes, that fit on 8
 * bits. All modern systems use ASCII-compatible charsets, where these
 * properties are true. If you are stuck with a dinosaur of a system
 * that still defaults to EBCDIC then you already have much bigger
 * interoperability issues to deal with.
 */

/*
 * Some macros for constant-time comparisons. These work over values in
 * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true".
 */
#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF)
#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF)
#define GE(x, y) (GT(y, x) ^ 0xFF)
#define LT(x, y) GT(y, x)
#define LE(x, y) GE(y, x)

/*
 * Convert value x (0..63) to corresponding Base64 character.
 */
static int
b64_byte_to_char(unsigned x)
{
  return(LT(x, 26) & (x + 'A'))
	| (GE(x, 26) & LT(x, 52) & (x + ('a' - 26)))
	| (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+')
	| (EQ(x, 63) & '/');
}

/*
 * Convert character c to the corresponding 6-bit value. If character c
 * is not a Base64 character, then 0xFF (255) is returned.
 */
static unsigned
b64_char_to_byte(int c)
{
  unsigned x;

  x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A'))
	| (GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26)))
	| (GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62)
	| (EQ(c, '/') & 63);
  return(x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF)));
}

/*
 * Convert some bytes to Base64. DST_LEN is the length (in characters)
 * of the output buffer DST; if that buffer is not large enough to
 * receive the result (including the terminating 0), then (size_t)-1
 * is returned. Otherwise, the zero-terminated Base64 string is written
 * in the buffer, and the output length (counted WITHOUT the terminating
 * zero) is returned.
 */
static size_t
to_base64(char *dst, size_t dst_len, const void *src, size_t src_len)
{
  size_t olen;
  const unsigned char *buf;
  unsigned int acc, acc_len;

  olen = (src_len / 3) << 2;
  switch (src_len % 3) {
  case 2:
	olen++;
    /* fall through */
  case 1:
	olen += 2;
	break;
  }

  if (dst_len <= olen)
	return((size_t) -1);

  acc = 0;
  acc_len = 0;
  buf = (const unsigned char *) src;
  while (src_len-- > 0) {
	acc = (acc << 8) + (*buf++);
	acc_len += 8;
	while (acc_len >= 6) {
	  acc_len -= 6;
	  *dst++ = (char) b64_byte_to_char((acc >> acc_len) & 0x3F);
	}
  }
  if (acc_len > 0) {
	*dst++ = (char) b64_byte_to_char((acc << (6 - acc_len)) & 0x3F);
  }
  *dst++ = 0;
  return(olen);
}

/*
 * Decode Base64 chars into bytes. The '*dst_len' value must initially
 * contain the length of the output buffer '*dst'; when the decoding
 * ends, the actual number of decoded bytes is written back in
 * '*dst_len'.
 *
 * Decoding stops when a non-Base64 character is encountered, or when
 * the output buffer capacity is exceeded. If an error occurred (output
 * buffer is too small, invalid last characters leading to unprocessed
 * buffered bits), then NULL is returned; otherwise, the returned value
 * points to the first non-Base64 character in the source stream, which
 * may be the terminating zero.
 */
static const char *
from_base64(void *dst, size_t *dst_len, const char *src)
{
  size_t len;
  unsigned char *buf;
  unsigned acc, acc_len;

  buf = (unsigned char *)dst;
  len = 0;
  acc = 0;
  acc_len = 0;
  for (;;) {
	unsigned d;

	d = b64_char_to_byte(*src);
	if (d == 0xFF) {
	  break;
	}
	src++;
	acc = (acc << 6) + d;
	acc_len += 6;
	if (acc_len >= 8) {
	  acc_len -= 8;
	  if ((len++) >= *dst_len) {
		return(NULL);
	  }
	  *buf++ = (acc >> acc_len) & 0xFF;
	}
  }

  /*
   * If the input length is equal to 1 modulo 4 (which is
   * invalid), then there will remain 6 unprocessed bits;
   * otherwise, only 0, 2 or 4 bits are buffered. The buffered
   * bits must also all be zero.
   */
  if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) {
	return(NULL);
  }
  *dst_len = len;
  return(src);
}

/*
 * Decode decimal integer from 'str'; the value is written in '*v'.
 * Returned value is a pointer to the next non-decimal character in the
 * string. If there is no digit at all, or the value encoding is not
 * minimal (extra leading zeros), or the value does not fit in an
 * 'unsigned long', then NULL is returned.
 */
static const char *
decode_decimal(const char *str, unsigned long *v)
{
  const char *orig;
  unsigned long acc;

  acc = 0;
  for (orig = str;; str++) {
	int c;

	c = *str;
	if (c < '0' || c > '9') {
	  break;
	}
	c -= '0';
	if (acc > (ULONG_MAX / 10)) {
	  return(NULL);
	}
	acc *= 10;
	if ((unsigned long)c > (ULONG_MAX - acc)) {
	  return(NULL);
	}
	acc += (unsigned long)c;
  }
  if (str == orig || (*orig == '0' && str != (orig + 1))) {
	return(NULL);
  }
  *v = acc;
  return(str);
}

/* ==================================================================== */
/*
 * Code specific to Argon2.
 *
 * The code below applies the following format:
 *
 *  $argon2<T>[$v=<num>]$m=<num>,t=<num>,p=<num>[,keyid=<bin>]
 *    [,data=<bin>][$<bin>[$<bin>]]
 *
 * where:
 *   <T> is either 'd' or 'i',
 *   <num> is a decimal integer (positive, fits in * an 'unsigned long'),
 *   <bin> is Base64-encoded data
 *   (no '=' padding characters, no newline or whitespace).
 *   "keyid" is a binary identifier for a key (up to 8 bytes);
 *   "data" is associated data (up to 32 bytes). When the 'keyid'
 *   (resp. the 'data') is empty, then it is omitted from the output.
 *
 * The last two binary chunks (encoded in Base64) are the salt and the output,
 * in that order.
 * Both are optional, but if the output is present, the salt must appear.
 * The binary salt length is between 8 and 48 bytes.
 * The output length is always exactly 32 bytes.
 */

/*
 * Decode an Argon2 hash string into the provided structure 'ctx'.
 * The fields ctx.saltlen, ctx.adlen, ctx.outlen set the maximal salt, ad, out
 * length values that are allowed; invalid input string causes an error.
 * Returned value is ARGON2_OK on success, other ARGON2_ codes on error.
 */
static int
decode_string(Argon2_context *ctx, const char *str, Argon2_type type)
{

  /* Check for prefix. */
#define CC(prefix)														\
  do {																	\
	size_t cc_len = strlen(prefix);										\
	if (strncmp(str, prefix, cc_len) != 0) {							\
	  return(ARGON2_DECODING_FAIL);										\
	}																	\
	str += cc_len;														\
  } while ((void)0, 0)

  /* prefix checking with supplied code */
#define CC_opt(prefix, code)											\
  do {																	\
	size_t cc_len = strlen(prefix);										\
	if (strncmp(str, prefix, cc_len) == 0) {							\
	  str += cc_len;													\
	  { code; }															\
	}																	\
  } while ((void)0, 0)

  /* Decoding  prefix into decimal */
#define DECIMAL(x)														\
  do {																	\
	unsigned long dec_x;												\
	str = decode_decimal(str, &dec_x);									\
	if (str == NULL) {													\
	  return(ARGON2_DECODING_FAIL);										\
	}																	\
	(x) = dec_x;														\
  } while ((void)0, 0)

#define BIN(buf, max_len, len)											\
  do {																	\
	size_t bin_len = (max_len);											\
	str = from_base64(buf, &bin_len, str);								\
	if (str == NULL || bin_len > UINT32_MAX) {							\
	  return(ARGON2_DECODING_FAIL);										\
	}																	\
	(len) = (uint32_t)bin_len;											\
  } while ((void)0, 0)

  size_t maxadlen = ctx->adlen;
  size_t maxsaltlen = ctx->saltlen;
  size_t maxoutlen = ctx->outlen;
  int validation_result;

  ctx->adlen = 0;
  ctx->saltlen = 0;
  ctx->outlen = 0;
  ctx->pwdlen = 0;

  if (type == Argon2_i)
	CC("$argon2i");
  else if (type == Argon2_d)
	CC("$argon2d");
  else
	return(ARGON2_INCORRECT_TYPE);

  ctx->version = ARGON2_VERSION_10;

  /* Reading the version number if the default is suppressed. */
  CC_opt("$v=", DECIMAL(ctx->version));
  CC("$m=");
  DECIMAL(ctx->m_cost);
  CC(",t=");
  DECIMAL(ctx->t_cost);
  CC(",p=");
  DECIMAL(ctx->lanes);
  ctx->threads = ctx->lanes;

  CC_opt(",data=", BIN(ctx->ad, maxadlen, ctx->adlen));
  if (*str == 0)
	return(ARGON2_OK);

  CC("$");
  BIN(ctx->salt, maxsaltlen, ctx->saltlen);
  if (*str == 0)
	return(ARGON2_OK);

  CC("$");
  BIN(ctx->out, maxoutlen, ctx->outlen);
  validation_result = validate_inputs(ctx);
  if (validation_result != ARGON2_OK)
	return(validation_result);

  if (*str == 0)
	return(ARGON2_OK);

  return(ARGON2_DECODING_FAIL);

#undef CC
#undef CC_opt
#undef DECIMAL
#undef BIN
}

/*
 * Encode an Argon2 hash string into the provided buffer. DST_LEN
 * contains the size, in bytes, of the DST buffer; if DST_LEN
 * is less than the number of required characters (including the
 * terminating 0), then this function returns ARGON2_ENCODING_ERROR.
 *
 * if CTX->OUTLEN is 0, then the hash string will be a salt string
 * (no output). if CTX->SALTLEN is also 0, then the string will be a
 * parameter-only string (no salt and no output).
 *
 * On success, ARGON2_OK is returned.
 *
 * No other parameters are checked
 */
static int
encode_string(char *dst, size_t dst_len, Argon2_context *ctx, Argon2_type type)
{
#define SS(str)															\
  do {																	\
	size_t pp_len = strlen(str);										\
	if (pp_len >= dst_len) {											\
	  return(ARGON2_ENCODING_FAIL);										\
	}																	\
	memcpy(dst, str, pp_len + 1);										\
	dst += pp_len;														\
	dst_len -= pp_len;													\
  } while ((void) 0, 0)

#define SX(x)															\
  do {																	\
	char tmp[30];														\
	sprintf(tmp, "%lu", (unsigned long) (x));							\
	SS(tmp);															\
  } while ((void) 0, 0)

#define SB(buf, len)													\
  do {																	\
	size_t sb_len = to_base64(dst, dst_len, buf, len);					\
	if (sb_len == (size_t) -1) {										\
	  return(ARGON2_ENCODING_FAIL);										\
	}																	\
	dst += sb_len;														\
	dst_len -= sb_len;													\
  } while ((void) 0, 0)

  if (type == Argon2_i)
	SS("$argon2i$v=");
  else if (type == Argon2_d)
	SS("$argon2d$v=");
  else
	return(ARGON2_ENCODING_FAIL);

  if (validate_inputs(ctx) != ARGON2_OK)
	return(validate_inputs(ctx));

  SX(ctx->version);
  SS("$m=");
  SX(ctx->m_cost);
  SS(",t=");
  SX(ctx->t_cost);
  SS(",p=");
  SX(ctx->lanes);

  if (ctx->adlen > 0) {
	SS(",data=");
	SB(ctx->ad, ctx->adlen);
  }

  if (ctx->saltlen == 0)
	return(ARGON2_OK);

  SS("$");
  SB(ctx->salt, ctx->saltlen);

  if (ctx->outlen == 0)
	return(ARGON2_OK);

  SS("$");
  SB(ctx->out, ctx->outlen);

  return(ARGON2_OK);

#undef SS
#undef SX
#undef SB
}

/* LEGACY CODE: version 1.2.1 and earlier
* Function fills a new memory block by overwriting @next_block. 
* @param prev_block Pointer to the previous block
* @param ref_block Pointer to the reference block
* @param next_block Pointer to the block to be constructed
* @pre all block pointers must be valid
*/
static void
fill_block(const Block *prev_block, const Block *ref_block,
		   Block *next_block)
{
  Block blockR, block_tmp;
  unsigned int i;

  copy_block(&blockR, ref_block);
  xor_block(&blockR, prev_block);
  copy_block(&block_tmp, &blockR);
  /*
   * Now blockR = ref_block + prev_block and bloc_tmp = ref_block + prev_block
   * Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
   * (16,17,..31)... finally (112,113,...127)
  */
  for (i = 0; i < 8; ++i) {
	BLAKE2_ROUND_NOMSG(blockR.v[16 * i], blockR.v[16 * i + 1],
					   blockR.v[16 * i + 2], blockR.v[16 * i + 3],
					   blockR.v[16 * i + 4], blockR.v[16 * i + 5],
					   blockR.v[16 * i + 6], blockR.v[16 * i + 7],
					   blockR.v[16 * i + 8], blockR.v[16 * i + 9],
					   blockR.v[16 * i + 10], blockR.v[16 * i + 11],
					   blockR.v[16 * i + 12], blockR.v[16 * i + 13],
					   blockR.v[16 * i + 14], blockR.v[16 * i + 15]);
  }
  
  /*
   * Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
   * (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127)
   */
  for (i = 0; i < 8; i++) {
	BLAKE2_ROUND_NOMSG(blockR.v[2 * i], blockR.v[2 * i + 1],
					   blockR.v[2 * i + 16], blockR.v[2 * i + 17],
					   blockR.v[2 * i + 32], blockR.v[2 * i + 33],
					   blockR.v[2 * i + 48], blockR.v[2 * i + 49],
					   blockR.v[2 * i + 64], blockR.v[2 * i + 65],
					   blockR.v[2 * i + 80], blockR.v[2 * i + 81],
					   blockR.v[2 * i + 96], blockR.v[2 * i + 97],
					   blockR.v[2 * i + 112], blockR.v[2 * i + 113]);
  }

  copy_block(next_block, &block_tmp);
  xor_block(next_block, &blockR);
}

/*
 * Function fills a new memory block by XORing over @next_block.
 * @next_block must be initialized
 * @param prev_block Pointer to the previous block
 * @param ref_block Pointer to the reference block
 * @param next_block Pointer to the block to be constructed
 * @pre all block pointers must be valid
 */
static void
fill_block_with_xor(const Block *prev_block, const Block *ref_block,
					Block *next_block)
{
  Block blockR, block_tmp;
  unsigned int i;

  copy_block(&blockR, ref_block);
  xor_block(&blockR, prev_block);
  copy_block(&block_tmp, &blockR);
  xor_block(&block_tmp, next_block);
  /*
   * Saving the next block contents for XOR over.
   * Now blockR = ref_block + prev_block and bloc_tmp = ref_block + prev_block
   * + next_block
   * Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
   * (16,17,..31)... finally (112,113,...127)
   */
  for (i = 0; i < 8; ++i) {
	BLAKE2_ROUND_NOMSG(blockR.v[16 * i], blockR.v[16 * i + 1],
					   blockR.v[16 * i + 2], blockR.v[16 * i + 3],
					   blockR.v[16 * i + 4], blockR.v[16 * i + 5],
					   blockR.v[16 * i + 6], blockR.v[16 * i + 7],
					   blockR.v[16 * i + 8], blockR.v[16 * i + 9],
					   blockR.v[16 * i + 10], blockR.v[16 * i + 11],
					   blockR.v[16 * i + 12], blockR.v[16 * i + 13],
					   blockR.v[16 * i + 14], blockR.v[16 * i + 15]);
  }

  /*
   * Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
   * (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127)
   */
  for (i = 0; i < 8; i++) {
	BLAKE2_ROUND_NOMSG(blockR.v[2 * i], blockR.v[2 * i + 1],
					   blockR.v[2 * i + 16], blockR.v[2 * i + 17],
					   blockR.v[2 * i + 32], blockR.v[2 * i + 33],
					   blockR.v[2 * i + 48], blockR.v[2 * i + 49],
					   blockR.v[2 * i + 64], blockR.v[2 * i + 65],
					   blockR.v[2 * i + 80], blockR.v[2 * i + 81],
					   blockR.v[2 * i + 96], blockR.v[2 * i + 97],
					   blockR.v[2 * i + 112], blockR.v[2 * i + 113]);
  }

  copy_block(next_block, &block_tmp);
  xor_block(next_block, &blockR);
}

/*
 * Generate pseudo-random values to reference blocks in the segment and puts
 * them into the array
 * @param instance Pointer to the current instance
 * @param position Pointer to the current position
 * @param pseudo_rands Pointer to the array of 64-bit values
 * @pre pseudo_rands must point to @a instance->segment_length allocated values
 */
static void
generate_addresses(const Argon2_instance_t *instance,
				   const Argon2_position_t *position,
				   uint64_t *pseudo_rands)
{
  Block zero_block, input_block, address_block,tmp_block;
  uint32_t i;

  init_block_value(&zero_block, 0);
  init_block_value(&input_block, 0);

  if (instance != NULL && position != NULL) {
	input_block.v[0] = position->pass;
	input_block.v[1] = position->lane;
	input_block.v[2] = position->slice;
	input_block.v[3] = instance->memory_blocks;
	input_block.v[4] = instance->passes;
	input_block.v[5] = instance->type;

	for (i = 0; i < instance->segment_length; ++i) {
	  if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
		input_block.v[6]++;
		init_block_value(&tmp_block, 0);
		init_block_value(&address_block, 0);
		fill_block_with_xor(&zero_block, &input_block, &tmp_block);
		fill_block_with_xor(&zero_block, &tmp_block, &address_block);
	  }

	  pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
	}
  }
}

/*
 * Fill the segment using previous segments also from other threads.
 * @param instance Pointer to the current instance
 * @param position Current position
 * @pre all block pointers must be valid
 */
static void
fill_segment(const Argon2_instance_t *instance, Argon2_position_t position)
{
  Block *ref_block = NULL, *curr_block = NULL;
  uint64_t pseudo_rand, ref_index, ref_lane;
  uint32_t prev_offset, curr_offset;
  uint32_t starting_index;
  uint32_t i;
  int data_independent_addressing;
  /* Pseudo-random values that determine the reference block position */
  uint64_t *pseudo_rands = NULL;

  if (instance == NULL)
	return;

  data_independent_addressing = (instance->type == Argon2_i);

  pseudo_rands
	= (uint64_t *) malloc(sizeof(uint64_t) * (instance->segment_length));

  if (pseudo_rands == NULL)
	return;

  if (data_independent_addressing)
	generate_addresses(instance, &position, pseudo_rands);

  starting_index = 0;

  if ((0 == position.pass) && (0 == position.slice))
	starting_index = 2; /* we have already generated the first two blocks */

  /* Offset of the current block */
  curr_offset = position.lane * instance->lane_length
	+ position.slice * instance->segment_length + starting_index;
  
  if (0 == curr_offset % instance->lane_length) {
	/* Last block in this lane */
	prev_offset = curr_offset + instance->lane_length - 1;
  }
  else {
	/* Previous block */
	prev_offset = curr_offset - 1;
  }

  for (i = starting_index; i < instance->segment_length;
	   ++i, ++curr_offset, ++prev_offset) {
	/*1.1 Rotating prev_offset if needed */
	if (curr_offset % instance->lane_length == 1) {
	  prev_offset = curr_offset - 1;
	}

	/* 1.2 Computing the index of the reference block */
	/* 1.2.1 Taking pseudo-random value from the previous block */
	if (data_independent_addressing) {
	  pseudo_rand = pseudo_rands[i];
	}
	else
	  pseudo_rand = instance->memory[prev_offset].v[0];

	/* 1.2.2 Computing the lane of the reference block */
	ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
	
	if ((position.pass == 0) && (position.slice == 0)) {
	  /* Can not reference other lanes yet */
	  ref_lane = position.lane;
	}

	/* 1.2.3 Computing the number of possible reference block within the
	 * lane.
	 */
	position.index = i;
	ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
							ref_lane == position.lane);

	/* 2 Creating a new block. */
	ref_block
	  = instance->memory + instance->lane_length * ref_lane + ref_index;
	curr_block = instance->memory + curr_offset;
	if (ARGON2_VERSION_10 == instance->version) {
	  /* version 1.2.1 and earlier: overwrite, not XOR */
	  fill_block(instance->memory + prev_offset, ref_block, curr_block);
	}
	else {
	  if (0 == position.pass)
		fill_block(instance->memory + prev_offset, ref_block, curr_block);
	  else
		fill_block_with_xor(instance->memory + prev_offset, ref_block,
							curr_block);
	}
  }
  
  free(pseudo_rands);
}

/*
 * Create a thread.
 * HANDLE: pointer to a thread handle, which is the output of this
 * function. Must not be NULL.
 * FUNC: A function pointer for the thread's entry point. Must not be
 * NULL.
 * ARGS: Pointer that is passed as an argument to @func. May be NULL.
 * Return 0 if HANDLE and FUNC are valid pointers and a thread is successfuly
 * created.
 */
static int
argon2_thread_create(Argon2_thread_handle_t *handle,
					 Argon2_thread_func_t func, void *args)
{

  if (NULL == handle || func == NULL)
	return(-1);

  return(pthread_create(handle, NULL, func, args));
}

/*
 * Wait for a thread to terminate.
 * HANDLE is a thread created with argon2_thread_create().
 * Return 0 if HANDLE is a valid handle and joining completed successfully.
 */
static int
argon2_thread_join(Argon2_thread_handle_t handle)
{

  return(pthread_join(handle, NULL));
}

/*
 * Terminate the current thread. Must be run inside a thread created by
 * argon2_thread_create().
 */
static void
argon2_thread_exit(void)
{

  /*
	fprintf(stderr, "argon2_thread_exit\n");
  */

  pthread_exit(NULL);
}

int
argon2_ctx(Argon2_context *context, Argon2_type type)
{
  /* 1. Validate all inputs. */
  int result = validate_inputs(context);
  uint32_t memory_blocks, segment_length;
  Argon2_instance_t instance;

  if (ARGON2_OK != result)
	return(result);

  if (Argon2_d != type && Argon2_i != type)
	return(ARGON2_INCORRECT_TYPE);

  /*
   * 2. Align memory size
   * Minimum memory_blocks = 8L blocks, where L is the number of lanes.
   */
  memory_blocks = context->m_cost;
  
  if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes)
	memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes;

  segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
  /* Ensure that all segments have equal length. */
  memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);

  instance.version = context->version;
  instance.memory = NULL;
  instance.passes = context->t_cost;
  instance.memory_blocks = memory_blocks;
  instance.segment_length = segment_length;
  instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
  instance.lanes = context->lanes;
  instance.threads = context->threads;
  instance.type = type;

  /*
   * 3. Initialization: Hashing inputs, allocating memory, filling first blocks.
   */
  result = initialize(&instance, context);

  if (ARGON2_OK != result)
	return(result);

  /* 4. Filling memory. */
  result = fill_memory_blocks(&instance);

  if (ARGON2_OK != result)
	return(result);

  /* 5. Finalization */
  finalize(context, &instance);

  return(ARGON2_OK);
}

/*
 * If HASH is non-NULL, it is a buffer of at least HASHLEN bytes to hold
 * the result.
 * If ENCODED is non-NULL, return the encoded parameter string.
 */
int
argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
			const uint32_t parallelism,
			const void *pwd, const size_t pwdlen,
			const void *salt, const size_t saltlen,
			const void *secret, const size_t secretlen,
			const void *ad, const size_t adlen,
			void *hash, const size_t hashlen, char *encoded,
			const size_t encodedlen, Argon2_type type, 
			const uint32_t version)
{
  Argon2_context context;
  int result;
  uint8_t *out;

  if (hashlen > ARGON2_MAX_OUTLEN)
	return(ARGON2_OUTPUT_TOO_LONG);

  if (hashlen < ARGON2_MIN_OUTLEN)
	return(ARGON2_OUTPUT_TOO_SHORT);

  if ((out = malloc(hashlen)) == NULL)
	return(ARGON2_MEMORY_ALLOCATION_ERROR);

  context.out = (uint8_t *) out;
  context.outlen = (uint32_t) hashlen;
  context.pwd = CONST_CAST(uint8_t *) pwd;
  context.pwdlen = (uint32_t) pwdlen;
  context.salt = CONST_CAST(uint8_t *) salt;
  context.saltlen = (uint32_t) saltlen;
  context.secret = CONST_CAST(uint8_t *) secret;
  context.secretlen = secretlen;
  context.ad = CONST_CAST(uint8_t *) ad;
  context.adlen = adlen;
  context.t_cost = t_cost;
  context.m_cost = m_cost;
  context.lanes = parallelism;
  context.threads = parallelism;
  context.allocate_cbk = NULL;
  context.free_cbk = NULL;
  context.flags = ARGON2_DEFAULT_FLAGS;
  context.version = version;
  
  result = argon2_ctx(&context, type);
  if (result != ARGON2_OK) {
	secure_wipe_memory(out, hashlen);
	free(out);
	return(result);
  }

  /* If raw hash requested, write it. */
  if (hash)
	memcpy(hash, out, hashlen);

  /* If encoding requested, write it. */
  if (encoded && encodedlen) {
	if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) {
	  secure_wipe_memory(out, hashlen); /* wipe buffers if error */
	  secure_wipe_memory(encoded, encodedlen);
	  free(out);
	  return(ARGON2_ENCODING_FAIL);
	}
  }
  secure_wipe_memory(out, hashlen);
  free(out);

  return(ARGON2_OK);
}

int
argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
					 const uint32_t parallelism, const void *pwd,
					 const size_t pwdlen, const void *salt,
					 const size_t saltlen, const size_t hashlen,
					 char *encoded, const size_t encodedlen)
{

  return(argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
					 NULL, 0, NULL, 0,
					 NULL, hashlen, encoded, encodedlen, Argon2_i,
					 ARGON2_VERSION_NUMBER));
}

int
argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
				 const uint32_t parallelism, const void *pwd,
				 const size_t pwdlen, const void *salt,
				 const size_t saltlen, void *hash, const size_t hashlen)
{

  return(argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
					 NULL, 0, NULL, 0,
					 hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER));
}

int
argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
					 const uint32_t parallelism, const void *pwd,
					 const size_t pwdlen, const void *salt,
					 const size_t saltlen, const size_t hashlen,
					 char *encoded, const size_t encodedlen)
{

  return(argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
					 NULL, 0, NULL, 0,
					 NULL, hashlen, encoded, encodedlen, Argon2_d,
					 ARGON2_VERSION_NUMBER));
}

int
argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
				 const uint32_t parallelism, const void *pwd,
				 const size_t pwdlen, const void *salt,
				 const size_t saltlen, void *hash, const size_t hashlen)
{
  
  return(argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
					 NULL, 0, NULL, 0,
					 hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER));
}

static int
argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len)
{
  size_t i;
  uint8_t d = 0U;

  for (i = 0U; i < len; i++)
	d |= b1[i] ^ b2[i];

  return((int) ((1 & ((d - 1) >> 8)) - 1));
}

int
argon2_verify(const char *encoded, const void *pwd, size_t pwdlen,
			  const void *secret, size_t secretlen,
			  Argon2_type type)
{
  Argon2_context ctx;
  uint8_t *out;
  int ret;
  int decode_result;
  uint32_t encoded_len;

  if(encoded == NULL)
	return(ARGON2_DECODING_FAIL);

  /* Max values, to be updated in decode_string. */
  encoded_len = strlen(encoded);
  ctx.adlen = encoded_len;
  ctx.saltlen = encoded_len;
  ctx.outlen = encoded_len;
  ctx.allocate_cbk = NULL;
  ctx.free_cbk = NULL;
  ctx.secret = CONST_CAST(uint8_t *) secret;
  ctx.secretlen = secretlen;
  ctx.pwdlen = 0;
  ctx.pwd = NULL;
  ctx.ad = malloc(ctx.adlen);
  ctx.salt = malloc(ctx.saltlen);
  ctx.out = malloc(ctx.outlen);

  if (!ctx.out || !ctx.salt || !ctx.ad) {
	free(ctx.ad);
	free(ctx.salt);
	free(ctx.out);
	return(ARGON2_MEMORY_ALLOCATION_ERROR);
  }

  if ((out = malloc(ctx.outlen)) == NULL) {
	free(ctx.ad);
	free(ctx.salt);
	free(ctx.out);
	return(ARGON2_MEMORY_ALLOCATION_ERROR);
  }

  decode_result = decode_string(&ctx, encoded, type);
  if (decode_result != ARGON2_OK) {
	free(ctx.ad);
	free(ctx.salt);
	free(ctx.out);
	free(out);
	return(decode_result);
  }

  ret = argon2_hash(ctx.t_cost, ctx.m_cost, ctx.threads, pwd, pwdlen,
					ctx.salt, ctx.saltlen,
					secret, secretlen,
					ctx.ad, ctx.adlen,
					out, ctx.outlen, NULL, 0, type,
					ctx.version);

  free(ctx.ad);
  free(ctx.salt);

  if (ret == ARGON2_OK && argon2_compare(out, ctx.out, ctx.outlen))
	ret = ARGON2_VERIFY_MISMATCH;

  free(out);
  free(ctx.out);

  return(ret);
}

int
argon2i_verify(const char *encoded, const void *pwd, size_t pwdlen,
			   const void *secret, size_t secretlen)
{

  return(argon2_verify(encoded, pwd, pwdlen, secret, secretlen, Argon2_i));
}

int
argon2d_verify(const char *encoded, const void *pwd, size_t pwdlen,
			   const void *secret, size_t secretlen)
{

  return(argon2_verify(encoded, pwd, pwdlen, secret, secretlen, Argon2_d));
}

int
argon2d_ctx(Argon2_context *context)
{

  return(argon2_ctx(context, Argon2_d));
}

int
argon2i_ctx(Argon2_context *context)
{

  return(argon2_ctx(context, Argon2_i));
}

int
argon2_verify_ctx(Argon2_context *context, const char *hash, Argon2_type type)
{
  int result;

  if (0 == context->outlen || NULL == hash)
	return(ARGON2_OUT_PTR_MISMATCH);

  result = argon2_ctx(context, type);

  if (ARGON2_OK != result)
	return(result);

  return(0 == memcmp(hash, context->out, context->outlen));
}

int
argon2d_verify_ctx(Argon2_context *context, const char *hash)
{

  return(argon2_verify_ctx(context, hash, Argon2_d));
}

int
argon2i_verify_ctx(Argon2_context *context, const char *hash)
{

  return(argon2_verify_ctx(context, hash, Argon2_i));
}

const char *
argon2_error_message(int error_code)
{

  switch (error_code) {
  case ARGON2_OK:
	return("OK");
  case ARGON2_OUTPUT_PTR_NULL:
	return("Output pointer is NULL");
  case ARGON2_OUTPUT_TOO_SHORT:
	return("Output is too short");
  case ARGON2_OUTPUT_TOO_LONG:
	return("Output is too long");
  case ARGON2_PWD_TOO_SHORT:
	return("Password is too short");
  case ARGON2_PWD_TOO_LONG:
	return("Password is too long");
  case ARGON2_SALT_TOO_SHORT:
	return("Salt is too short");
  case ARGON2_SALT_TOO_LONG:
	return("Salt is too long");
  case ARGON2_AD_TOO_SHORT:
	return("Associated data is too short");
  case ARGON2_AD_TOO_LONG:
	return("Associated data is too long");
  case ARGON2_SECRET_TOO_SHORT:
	return("Secret is too short");
  case ARGON2_SECRET_TOO_LONG:
	return("Secret is too long");
  case ARGON2_TIME_TOO_SMALL:
	return("Time cost is too small");
  case ARGON2_TIME_TOO_LARGE:
	return("Time cost is too large");
  case ARGON2_MEMORY_TOO_LITTLE:
	return("Memory cost is too small");
  case ARGON2_MEMORY_TOO_MUCH:
	return("Memory cost is too large");
  case ARGON2_LANES_TOO_FEW:
	return("Too few lanes");
  case ARGON2_LANES_TOO_MANY:
	return("Too many lanes");
  case ARGON2_PWD_PTR_MISMATCH:
	return("Password pointer is NULL, but password length is not 0");
  case ARGON2_SALT_PTR_MISMATCH:
	return("Salt pointer is NULL, but salt length is not 0");
  case ARGON2_SECRET_PTR_MISMATCH:
	return("Secret pointer is NULL, but secret length is not 0");
  case ARGON2_AD_PTR_MISMATCH:
	return("Associated data pointer is NULL, but ad length is not 0");
  case ARGON2_MEMORY_ALLOCATION_ERROR:
	return("Memory allocation error");
  case ARGON2_FREE_MEMORY_CBK_NULL:
	return("The free memory callback is NULL");
  case ARGON2_ALLOCATE_MEMORY_CBK_NULL:
	return("The allocate memory callback is NULL");
  case ARGON2_INCORRECT_PARAMETER:
	return("Argon2_Context context is NULL");
  case ARGON2_INCORRECT_TYPE:
	return("There is no such version of Argon2");
  case ARGON2_OUT_PTR_MISMATCH:
	return("Output pointer mismatch");
  case ARGON2_THREADS_TOO_FEW:
	return("Not enough threads");
  case ARGON2_THREADS_TOO_MANY:
	return("Too many threads");
  case ARGON2_MISSING_ARGS:
	return("Missing arguments");
  case ARGON2_ENCODING_FAIL:
	return("Encoding failed");
  case ARGON2_DECODING_FAIL:
	return("Decoding failed");
  case ARGON2_THREAD_FAIL:
	return("Threading failure");
  case ARGON2_DECODING_LENGTH_FAIL:
	return("Some of encoded parameters are too long or too short");
  case ARGON2_VERIFY_MISMATCH:
	return("The password does not match the supplied hash");
  default:
	return("Unknown error code");
  }
}

static size_t
argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
				  uint32_t saltlen, uint32_t hashlen, uint32_t adlen)
{
  size_t len;

  len = strlen("$argon2x$v=$m=,t=,p=$$,data=$$")
	+ numlen(ARGON2_VERSION_NUMBER)
	+ numlen(m_cost)
	+ numlen(t_cost)
	+ numlen(parallelism)
	+ b64len(saltlen)
	+ b64len(hashlen)
	+ b64len(adlen);

  return(len);
}

#define _GNU_SOURCE 1

#define T_COST_DEF 3
#define LOG_M_COST_DEF 12 /* 2^12 = 4 MiB */
#define LANES_DEF 1
#define THREADS_DEF 1
#define OUTLEN_DEF 32
#define MAX_PASS_LEN 128

/*
 * Wrapper for Argon2 with given inputs and parameters, inputs not cleared.
 * OUT: buffer to put the digest value
 * OUTLEN: size of the output buffer, in bytes
 * PWD: the password vector, of length PWDLEN bytes
 * SALT: the salt vector, of length SALTLEN bytes
 * SECRET: the (optional) secret vector, of length SECRETLEN bytes
 * AD: the (optional) associated data vector, of length ADLEN bytes
 * T_COST: number of iterations
 * M_COST: amount of requested memory, in KB
 * LANES: amount of requested parallelism
 * THREADS: actual parallelism
 * TYPE: the flavour of Argon2 to be used
 * ENCODEDP: return the encoded (parameterized) hash string
 *
 * Return 0 if ok, otherwise a negative error code (Argon2_ErrorCodes).
 */
int
argon2(uint8_t *out, uint32_t outlen, unsigned char *pwd, size_t pwdlen,
	   unsigned char *salt, size_t saltlen,
	   unsigned char *secret, size_t secretlen,
	   unsigned char *ad, size_t adlen,
	   uint32_t t_cost, uint32_t m_cost, uint32_t lanes, uint32_t threads,
	   Argon2_type type, char **encodedp)
{
  int result;
  char *encoded;
  size_t encodedlen;

  if (pwd == NULL || pwdlen == 0) {
	err("password missing");
	return(ARGON2_PWD_TOO_SHORT);
  }

  if (salt == NULL || saltlen == 0) {
	secure_wipe_memory(pwd, pwdlen);
	err("salt missing");
	return(ARGON2_SALT_TOO_SHORT);
  }

  encodedlen = argon2_encodedlen(t_cost, m_cost, lanes, saltlen, outlen, adlen);
  encoded = malloc(encodedlen + 1);
  if (encoded == NULL) {
	secure_wipe_memory(pwd, pwdlen);
	err("could not allocate memory for hash");
	return(ARGON2_MEMORY_ALLOCATION_ERROR);
  }

  result = argon2_hash(t_cost, m_cost, threads, pwd, pwdlen, salt, saltlen,
					   secret, secretlen, ad, adlen,
					   out, outlen, encoded, encodedlen, type,
					   ARGON2_VERSION_NUMBER);
  if (result != ARGON2_OK) {
	err(argon2_error_message(result));
	return(result);
  }

  if (encodedp != NULL)
	*encodedp = encoded;

  return(ARGON2_OK);
}

int
do_argon2_tests(void)
{
  int rc;
  char *encoded;
  uint32_t lanes, outlen, m_cost, t_cost, threads;
  uint8_t *out;
  size_t pwdlen, secretlen;
  Argon2_type type;
  unsigned char pwd[32], salt[16];
  static uint8_t secret[8] = {
	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03
  };
  static uint8_t ad[12] = {
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
  };
  static uint8_t expected_prehash[ARGON2_PREHASH_DIGEST_LENGTH] = {
	0xc4, 0x60, 0x65, 0x81, 0x52, 0x76, 0xa0, 0xb3,
	0xe7, 0x31, 0x73, 0x1c, 0x90, 0x2f, 0x1f, 0xd8,
	0x0c, 0xf7, 0x76, 0x90, 0x7f, 0xbb, 0x7b, 0x6a,
	0x5c, 0xa7, 0x2e, 0x7b, 0x56, 0x01, 0x1f, 0xee,
	0xca, 0x44, 0x6c, 0x86, 0xdd, 0x75, 0xb9, 0x46,
	0x9a, 0x5e, 0x68, 0x79, 0xde, 0xc4, 0xb7, 0x2d,
	0x08, 0x63, 0xfb, 0x93, 0x9b, 0x98, 0x2e, 0x5f,
	0x39, 0x7c, 0xc7, 0xd1, 0x64, 0xfd, 0xda, 0xa9
  };
  static uint8_t expected_digest[OUTLEN_DEF] = {
	0xc8, 0x14, 0xd9, 0xd1, 0xdc, 0x7f, 0x37, 0xaa,
	0x13, 0xf0, 0xd7, 0x7f, 0x24, 0x94, 0xbd, 0xa1,
	0xc8, 0xde, 0x6b, 0x01, 0x6d, 0xd3, 0x88, 0xd2,
	0x99, 0x52, 0xa4, 0xc4, 0x67, 0x2b, 0x6c, 0xe8
  };

  fprintf(stderr, "draft-irtf-cfrg-argon2-00, S6.2. Argon2i Test Vector\n");

  pwdlen = sizeof(pwd);
  memset(pwd, 1, pwdlen);
  memset(salt, 2, sizeof(salt));
  secretlen = sizeof(secret);
  threads = 4;
  type = Argon2_i;

  t_cost = 3; 	/* Iterations */
  m_cost = 32;	/* Memory requirement is this value times 1024 */
  lanes = 4;
  outlen = 32;

  prehash_digest_buf = (uint8_t *) malloc(ARGON2_PREHASH_SEED_LENGTH);

  if ((out = malloc(outlen + 1)) == NULL) {
	secure_wipe_memory(pwd, sizeof(pwd));
	err("could not allocate memory for output");
	return(-1);
  }

  /* Generate the encoding descriptor string. */
  rc = argon2(NULL, outlen, pwd, 32, salt, 16, secret, 8, ad, 12,
			  t_cost, m_cost, lanes, threads, type, &encoded);
  if (rc == -1)
	return(-1);

  fprintf(stderr, "Encoded: %s\n", encoded);

  /* Generate the hash. */
  rc = argon2(out, outlen, pwd, 32, salt, 16, secret, 8, ad, 12,
			  t_cost, m_cost, lanes, threads, type, NULL);
  if (rc != 0)
	return(-1);

  fprintf(stderr, "Prehash digest:\n");
  print_hex(stderr, prehash_digest_buf, ARGON2_PREHASH_DIGEST_LENGTH);

  fprintf(stderr, "Hash:\n");
  print_hex(stderr, out, outlen);

  if (memcmp(prehash_digest_buf, expected_prehash, sizeof(expected_prehash))) {
	err("Invalid prehash digest value");
	return(-1);
  }

  if (memcmp(out, expected_digest, sizeof(expected_digest))) {
	err("Invalid digest value");
	return(-1);
  }

  /*
   * Recompute the hash from the descriptor string and user-level parameters
   * and compare to the original value.
   */
  fprintf(stderr, "Verifying: ");
  rc = argon2_verify(encoded, pwd, pwdlen, secret, secretlen, type);
  if (rc != ARGON2_OK) {
	err(argon2_error_message(rc));
	free(encoded);
	return(-1);
  }
  fprintf(stderr, "ok\n");

  return(0);
}

#ifdef PROG
int
main(int argc, char **argv)
{

  if (do_argon2_tests() == 0) {
	fprintf(stderr, "Argon2 ok\n");
	return(0);
  }

  fprintf(stderr, "Argon2 failed\n");
  return(-1);
}
#endif
