#include <sys/time.h>

#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static size_t kjs_strlen(const char *);
static size_t cp_strlen(const char *);
static size_t kjs_strlen_utf8(const char *);
static size_t gp_strlen_utf8(const char *s);
static size_t cp_strlen_utf8(const char *s);
static size_t testfunc(size_t(const char *), const char *,
    double *, double *);

static size_t
kjs_strlen(const char * s)
{
	size_t i = 0;

	while (*s++)
		i++;
	return (i);
}

#define ONEMASK ((size_t)(-1) / 0xFF)

static size_t
cp_strlen(const char * _s)
{
	const char * s;
	size_t u;

	/* Handle any initial misaligned bytes. */
	for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) {
		/* Exit if we hit a zero byte. */
		if (*s == '\0')
			goto done;
	}

	/* Handle complete blocks. */
	for (; ; s += sizeof(size_t)) {
		/* Prefetch 256 bytes ahead. */
		__builtin_prefetch(&s[256], 0, 0);

		/* Grab 4 or 8 bytes of UTF-8 data. */
		u = *(size_t *)(s);

		/* Exit the loop if there are any zero bytes. */
		if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80))
			break;
	}

	/* Take care of any left-over bytes. */
	for (; ; s++) {
		/* Exit if we hit a zero byte. */
		if (*s == '\0')
			break;
	}

done:
	return (s - _s);

}

static size_t
kjs_strlen_utf8(const char * s)
{
	size_t i = 0, j = 0;

	while (s[i]) {
		if ((s[i] & 0xc0) != 0x80)
			j++;
		i++;
	}
	return (j);
}

static size_t
gp_strlen_utf8(const char *_s)
{
	const signed char * s = _s;
	size_t i = 0;
	size_t iBefore = 0;
	size_t count = 0;

	while (s[i] > 0) {
ascii:
		i++;
	}

	count += i - iBefore;

	while (s[i]) {
		if (s[i] > 0) {
			iBefore = i;
			goto ascii;
		} else {
			switch (0xF0 & s[i]) {
			case 0xE0:
				i += 3;
				break;
			case 0xF0:
				i += 4;
				break;
			default:
				i += 2;
				break;
			}
		}

		count++;
	}

	return (count);
}

static size_t
cp_strlen_utf8(const char * _s)
{
	const char * s;
	size_t count = 0;
	size_t u;
	unsigned char b;

	/* Handle any initial misaligned bytes. */
	for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) {
		b = *s;

		/* Exit if we hit a zero byte. */
		if (b == '\0')
			goto done;

		/* Is this byte NOT the first byte of a character? */
		count += (b >> 7) & ((~b) >> 6);
	}

	/* Handle complete blocks. */
	for (; ; s += sizeof(size_t)) {
		/* Prefetch 256 bytes ahead. */
		__builtin_prefetch(&s[256], 0, 0);

		/* Grab 4 or 8 bytes of UTF-8 data. */
		u = *(size_t *)(s);

		/* Exit the loop if there are any zero bytes. */
		if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80))
			break;

		/* Count bytes which are NOT the first byte of a character. */
		u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6);
		count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8);
	}

	/* Take care of any left-over bytes. */
	for (; ; s++) {
		b = *s;

		/* Exit if we hit a zero byte. */
		if (b == '\0')
			break;

		/* Is this byte NOT the first byte of a character? */
		count += (b >> 7) & ((~b) >> 6);
	}

done:
	return ((s - _s) - count);
}

static size_t
testfunc(size_t func(const char *), const char * s, double * mu, double * sd)
{
	struct timeval stime, etime;
	int i;
	size_t res;
	double t, sum, sum2;

	sum = sum2 = 0;
	for (i = 0; i < 10; i++) {
		/* Do the test. */
		if (gettimeofday(&stime, NULL)) {
			fprintf(stderr, "gettimeofday failed");
			exit(1);
		}
		res = func(s);
		if (gettimeofday(&etime, NULL)) {
			fprintf(stderr, "gettimeofday failed");
			exit(1);
		}

		/* Compute the time taken and add it to the sums. */
		t = (etime.tv_sec - stime.tv_sec) +
		    (etime.tv_usec - stime.tv_usec) / 1000000.0;
		sum += t;
		sum2 += t * t;
	}

	/* Compute statistics and return result. */
	*mu = sum / 10;
	*sd = sqrt(sum2 - sum * sum / 10) / 3;
	return (res);
}

/* List of implementations. */
struct {
	const char * name;
	size_t (* func)(const char *);
} testees[] = {
	{ "gcc_strlen", strlen },
	{ "kjs_strlen", kjs_strlen },
	{ "cp_strlen", cp_strlen },
	{ "kjs_strlen_utf8", kjs_strlen_utf8 },
	{ "gp_strlen_utf8", gp_strlen_utf8 },
	{ "cp_strlen_utf8", cp_strlen_utf8 },
};

/* Test strings. */
const char * strings[] = {
	"",
	"hello, world",
	"na\xc3\xafve",
	/* ko n ni chi wa = 5 */
	"\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf",
	NULL
};

/* Length of buffer to use for performance tests: 32MB. */
#define BIGSTRINGLEN 33554432

int
main(int argc, char * argv[])
{
	const char ** s;
	size_t i;
	char * bigstring;
	size_t res;
	double mu, sd;

	(void)argc; /* UNUSED */
	(void)argv; /* UNUSED */

	/*
	 * Print output from each function applied to each string, so that
	 * the user can see that the correct values are being printed.
	 */
	for (s = strings; *s != NULL; s++) {
		printf("\"%s\":", *s);
		for (i = 0; i < sizeof(testees) / sizeof(testees[0]); i++)
			printf(" %zu", testees[i].func(*s));
		printf("\n");
	}

	/* Allocate space for long-string tests. */
	if ((bigstring = malloc(BIGSTRINGLEN)) == NULL) {
		fprintf(stderr, "Out of memory");
		exit(1);
	}

	/*
	 * For each of the non-empty test strings, fill bigstring with copies
	 * of the test string; then time each of the test functions.
	 */
	for (s = strings; *s != NULL; s++) {
		/* Skip empty strings. */
		if ((*s)[0] == '\0')
			continue;

		/*
		 * Fill bigstring buffer with as many complete copies of the
		 * test string as we can fit, and NUL terminate.
		 */
		for (i = 0; i < BIGSTRINGLEN - strlen(*s); i += strlen(*s)) {
			memcpy(&bigstring[i], *s, strlen(*s));
		}
		bigstring[i] = '\0';

		/* Print header. */
		printf("testing %zu bytes of repeated \"%s\":\n", i, *s);

		/* Test each function. */
		for (i = 0; i < sizeof(testees) / sizeof(testees[0]); i++) {
			/* Do 10 tests and report the mean and sd. */
			res = testfunc(testees[i].func, bigstring, &mu, &sd);

			/* Print the results. */
			printf("%32s = %10zu: %lf +/- %lf\n",
			    testees[i].name, res, mu, sd);
		}
	}

	/*
	 * Free allocated memory (pointless since we're about to exit, but
	 * good practice anyway).
	 */
	free(bigstring);

	return (0);
}
