/*
 * libwebsockets - trie
 *
 * Copyright (C) 2018 Andy Green <andy@warmcat.com>
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation:
 *  version 2.1 of the License.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *  MA  02110-1301  USA
 *
 * The functions allow
 *
 *  - collecting a concordance of strings from one or more files (eg, a
 *    directory of files) into a single in-memory, lac-backed trie;
 *
 *  - to optimize and serialize the in-memory trie to an fd;
 *
 *  - to very quickly report any instances of a string in any of the files
 *    indexed by the trie, by a seeking around a serialized trie fd, without
 *    having to load it all in memory
 */

#include "core/private.h"
#include "misc/fts/private.h"

#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/types.h>

struct lws_fts_entry;

/* notice these are stored in t->lwsac_input_head which has input file scope */

struct lws_fts_filepath {
	struct lws_fts_filepath *next;
	struct lws_fts_filepath *prev;
	char filepath[256];
	jg2_file_offset ofs;
	jg2_file_offset line_table_ofs;
	int filepath_len;
	int file_index;
	int total_lines;
	int priority;
};

/* notice these are stored in t->lwsac_input_head which has input file scope */

struct lws_fts_lines {
	struct lws_fts_lines *lines_next;
	/*
	 * amount of line numbers needs to meet average count for best
	 * efficiency.
	 *
	 * Line numbers are stored in VLI format since if we don't, around half
	 * the total lac allocation consists of struct lws_fts_lines...
	 * size chosen to maintain 8-byte struct alignment
	 */
	uint8_t vli[119];
	char count;
};

/* this represents the instances of a symbol inside a given filepath */

struct lws_fts_instance_file {
	/* linked-list of tifs generated for current file */
	struct lws_fts_instance_file *inst_file_next;
	struct lws_fts_entry *owner;
	struct lws_fts_lines *lines_list, *lines_tail;
	uint32_t file_index;
	uint32_t total;

	/*
	 * optimization for the common case there's only 1 - ~3 matches, so we
	 * don't have to allocate any lws_fts_lines struct
	 *
	 * Using 8 bytes total for this maintains 8-byte struct alignment...
	 */

	uint8_t vli[7];
	char count;
};

/*
 * this is the main trie in-memory allocation object
 */

struct lws_fts_entry {
	struct lws_fts_entry *parent;

	struct lws_fts_entry *child_list;
	struct lws_fts_entry *sibling;

	/*
	 * care... this points to content in t->lwsac_input_head, it goes
	 * out of scope when the input file being indexed completes
	 */
	struct lws_fts_instance_file *inst_file_list;

	jg2_file_offset ofs_last_inst_file;

	char *suffix; /* suffix string or NULL if one char (in .c) */
	jg2_file_offset ofs;
	uint32_t child_count;
	uint32_t instance_count;
	uint32_t agg_inst_count;
	uint32_t agg_child_count;
	uint32_t suffix_len;
	unsigned char c;
};

/* there's only one of these per trie file */

struct lws_fts {
	struct lwsac *lwsac_head;
	struct lwsac *lwsac_input_head;
	struct lws_fts_entry *root;
	struct lws_fts_filepath *filepath_list;
	struct lws_fts_filepath *fp;

	struct lws_fts_entry *parser;
	struct lws_fts_entry *root_lookup[256];

	/*
	 * head of linked-list of tifs generated for current file
	 * care... this points to content in t->lwsac_input_head
	 */
	struct lws_fts_instance_file *tif_list;

	jg2_file_offset c; /* length of output file so far */

	uint64_t agg_trie_creation_us;
	uint64_t agg_raw_input;
	uint64_t worst_lwsac_input_size;
	int last_file_index;
	int chars_in_line;
	jg2_file_offset last_block_len_ofs;
	int line_number;
	int lines_in_unsealed_linetable;
	int next_file_index;
	int count_entries;

	int fd;
	unsigned int agg_pos;
	unsigned int str_match_pos;

	unsigned char aggregate;
	unsigned char agg[128];
};

/* since the kernel case allocates >300MB, no point keeping this too low */

#define TRIE_LWSAC_BLOCK_SIZE (1024 * 1024)

#define spill(margin, force) \
	if (bp && ((uint32_t)bp >= (sizeof(buf) - (margin)) || (force))) { \
		if (write(t->fd, buf, bp) != bp) { \
			lwsl_err("%s: write %d failed (%d)\n", __func__, \
				 bp, errno); \
			return 1; \
		} \
		t->c += bp; \
		bp = 0; \
	}

static int
g32(unsigned char *b, uint32_t d)
{
	*b++ = (d >> 24) & 0xff;
	*b++ = (d >> 16) & 0xff;
	*b++ = (d >> 8) & 0xff;
	*b = d & 0xff;

	return 4;
}

static int
g16(unsigned char *b, int d)
{
	*b++ = (d >> 8) & 0xff;
	*b = d & 0xff;

	return 2;
}

static int
wq32(unsigned char *b, uint32_t d)
{
	unsigned char *ob = b;

	if (d > (1 << 28) - 1)
		*b++ = ((d >> 28) | 0x80) & 0xff;

	if (d > (1 << 21) - 1)
		*b++ = ((d >> 21) | 0x80) & 0xff;

	if (d > (1 << 14) - 1)
		*b++ = ((d >> 14) | 0x80) & 0xff;

	if (d > (1 << 7) - 1)
		*b++ = ((d >> 7) | 0x80) & 0xff;

	*b++ = d & 0x7f;

	return (int)(b - ob);
}


/* read a VLI, return the number of bytes used */

int
rq32(unsigned char *b, uint32_t *d)
{
	unsigned char *ob = b;
	uint32_t t = 0;

	t = *b & 0x7f;
	if (*(b++) & 0x80) {
		t = (t << 7) | (*b & 0x7f);
		if (*(b++) & 0x80) {
			t = (t << 7) | (*b & 0x7f);
			if (*(b++) & 0x80) {
				t = (t << 7) | (*b & 0x7f);
				if (*(b++) & 0x80) {
					t = (t << 7) | (*b & 0x7f);
					b++;
				}
			}
		}
	}

	*d = t;

	return (int)(b - ob);
}

struct lws_fts *
lws_fts_create(int fd)
{
	struct lws_fts *t;
	struct lwsac *lwsac_head = NULL;
	unsigned char buf[TRIE_FILE_HDR_SIZE];

	t = lwsac_use(&lwsac_head, sizeof(*t), TRIE_LWSAC_BLOCK_SIZE);
	if (!t)
		return NULL;

	memset(t, 0, sizeof(*t));

	t->fd = fd;
	t->lwsac_head = lwsac_head;
	t->root = lwsac_use(&lwsac_head, sizeof(*t->root),
			    TRIE_LWSAC_BLOCK_SIZE);
	if (!t->root)
		goto unwind;

	memset(t->root, 0, sizeof(*t->root));
	t->parser = t->root;
	t->last_file_index = -1;
	t->line_number = 1;
	t->filepath_list = NULL;

	memset(t->root_lookup, 0, sizeof(*t->root_lookup));

	/* write the header */

	buf[0] = 0xca;
	buf[1] = 0x7a;
	buf[2] = 0x5f;
	buf[3] = 0x75;

	/* (these are filled in with correct data at the end) */

	/* file offset to root trie entry */
	g32(&buf[4], 0);
	/* file length when it was created */
	g32(&buf[8], 0);
	/* fileoffset to the filepath table */
	g32(&buf[0xc], 0);
	/* count of filepaths */
	g32(&buf[0x10], 0);

	if (write(t->fd, buf, TRIE_FILE_HDR_SIZE) != TRIE_FILE_HDR_SIZE) {
		lwsl_err("%s: trie header write failed\n", __func__);
		goto unwind;
	}

	t->c = TRIE_FILE_HDR_SIZE;

	return t;

unwind:
	lwsac_free(&lwsac_head);

	return NULL;
}

void
lws_fts_destroy(struct lws_fts **trie)
{
	struct lwsac *lwsac_head = (*trie)->lwsac_head;
	lwsac_free(&(*trie)->lwsac_input_head);
	lwsac_free(&lwsac_head);
	*trie = NULL;
}

int
lws_fts_file_index(struct lws_fts *t, const char *filepath, int filepath_len,
		    int priority)
{
	struct lws_fts_filepath *fp = t->filepath_list;
#if 0
	while (fp) {
		if (fp->filepath_len == filepath_len &&
		    !strcmp(fp->filepath, filepath))
			return fp->file_index;

		fp = fp->next;
	}
#endif
	fp = lwsac_use(&t->lwsac_head, sizeof(*fp), TRIE_LWSAC_BLOCK_SIZE);
	if (!fp)
		return -1;

	fp->next = t->filepath_list;
	t->filepath_list = fp;
	strncpy(fp->filepath, filepath, sizeof(fp->filepath) - 1);
	fp->filepath[sizeof(fp->filepath) - 1] = '\0';
	fp->filepath_len = filepath_len;
	fp->file_index = t->next_file_index++;
	fp->line_table_ofs = t->c;
	fp->priority = priority;
	fp->total_lines = 0;
	t->fp = fp;

	return fp->file_index;
}

static struct lws_fts_entry *
lws_fts_entry_child_add(struct lws_fts *t, unsigned char c,
			struct lws_fts_entry *parent)
{
	struct lws_fts_entry *e, **pe;

	e = lwsac_use(&t->lwsac_head, sizeof(*e), TRIE_LWSAC_BLOCK_SIZE);
	if (!e)
		return NULL;

	memset(e, 0, sizeof(*e));

	e->c = c;
	parent->child_count++;
	e->parent = parent;
	t->count_entries++;

	/* keep the parent child list in ascending sort order for c */

	pe = &parent->child_list;
	while (*pe) {
		assert((*pe)->parent == parent);
		if ((*pe)->c > c) {
			/* add it before */
			e->sibling = *pe;
			*pe = e;
			break;
		}
		pe = &(*pe)->sibling;
	}

	if (!*pe) {
		/* add it at the end */
		e->sibling = NULL;
		*pe = e;
	}

	return e;
}

static int
finalize_per_input(struct lws_fts *t)
{
	struct lws_fts_instance_file *tif;
	unsigned char buf[8192];
	uint64_t lwsac_input_size;
	jg2_file_offset temp;
	int bp = 0;

	bp += g16(&buf[bp], 0);
	bp += g16(&buf[bp], 0);
	bp += g32(&buf[bp], 0);
	if (write(t->fd, buf, bp) != bp)
		return 1;
	t->c += bp;
	bp = 0;

	/*
	 * Write the generated file index + instances (if any)
	 *
	 * Notice the next same-parent file instance fileoffset list is
	 * backwards, so it does not require seeks to fill in.  The first
	 * entry has 0 but the second entry points to the first entry (whose
	 * fileoffset is known).
	 *
	 * After all the file instance structs are finalized,
	 * .ofs_last_inst_file contains the fileoffset of that child's tif
	 * list head in the file.
	 *
	 * The file instances are written to disk in the order that the files
	 * were indexed, along with their prev pointers inline.
	 */

	tif = t->tif_list;
	while (tif) {
		struct lws_fts_lines *i;

		spill((3 * MAX_VLI) + tif->count, 0);

		temp = tif->owner->ofs_last_inst_file;
		if (tif->total)
			tif->owner->ofs_last_inst_file = t->c + bp;

		assert(!temp || (temp > TRIE_FILE_HDR_SIZE && temp < t->c));

		/* fileoffset of prev instance file for this entry, or 0 */
		bp += wq32(&buf[bp], temp);
		bp += wq32(&buf[bp], tif->file_index);
		bp += wq32(&buf[bp], tif->total);

		/* remove any pointers into this disposable lac footprint */
		tif->owner->inst_file_list = NULL;

		memcpy(&buf[bp], &tif->vli, tif->count);
		bp += tif->count;

		i = tif->lines_list;
		while (i) {
			spill(i->count, 0);
			memcpy(&buf[bp], &i->vli, i->count);
			bp += i->count;

			i = i->lines_next;
		}

		tif = tif->inst_file_next;
	}

	spill(0, 1);

	assert(lseek(t->fd, 0, SEEK_END) == (off_t)t->c);

	if (t->lwsac_input_head) {
		lwsac_input_size = lwsac_total_alloc(t->lwsac_input_head);
		if (lwsac_input_size > t->worst_lwsac_input_size)
			t->worst_lwsac_input_size = lwsac_input_size;
	}

	/*
	 * those per-file allocations are all on a separate lac so we can
	 * free it cleanly afterwards
	 */
	lwsac_free(&t->lwsac_input_head);

	/* and lose the pointer into the deallocated lac */
	t->tif_list = NULL;

	return 0;
}

/*
 * 0 = punctuation, whitespace, brackets etc
 * 1 = character inside symbol set
 * 2 = upper-case character inside symbol set
 */

static char classify[] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
	0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, //1,
	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};

#if 0
static const char *
name_entry(struct lws_fts_entry *e1, char *s, int len)
{
	struct lws_fts_entry *e2;
	int n = len;

	s[--n] = '\0';

	e2 = e1;
	while (e2) {
		if (e2->suffix) {
			if ((int)e2->suffix_len < n) {
				n -= e2->suffix_len;
				memcpy(&s[n], e2->suffix, e2->suffix_len);
			}
		} else {
			n--;
			s[n] = e2->c;
		}

		e2 = e2->parent;
	}

	return &s[n + 1];
}
#endif

/*
 * as we parse the input, we create a line length table for the file index.
 * Only the file header has been written before we start doing this.
 */

int
lws_fts_fill(struct lws_fts *t, uint32_t file_index, const char *buf,
	     size_t len)
{
	unsigned long long tf = lws_time_in_microseconds();
	unsigned char c, linetable[256], vlibuf[8];
	struct lws_fts_entry *e, *e1, *dcl;
	struct lws_fts_instance_file *tif;
	int bp = 0, sline, chars, m;
	char *osuff, skipline = 0;
	struct lws_fts_lines *tl;
	unsigned int olen, n;
	off_t lbh;

	if ((int)file_index != t->last_file_index) {
		if (t->last_file_index >= 0)
			finalize_per_input(t);
		t->last_file_index = file_index;
		t->line_number = 1;
		t->chars_in_line = 0;
		t->lines_in_unsealed_linetable = 0;
	}

	t->agg_raw_input += len;

resume:

	chars = 0;
	lbh = t->c;
	sline = t->line_number;
	bp += g16(&linetable[bp], 0);
	bp += g16(&linetable[bp], 0);
	bp += g32(&linetable[bp], 0);

	while (len) {
		char go_around = 0;

		if (t->lines_in_unsealed_linetable >= LWS_FTS_LINES_PER_CHUNK)
			break;

		len--;

		c = (unsigned char)*buf++;
		t->chars_in_line++;
		if (c == '\n') {
			skipline = 0;
			t->filepath_list->total_lines++;
			t->lines_in_unsealed_linetable++;
			t->line_number++;

			bp += wq32(&linetable[bp], t->chars_in_line);
			if ((unsigned int)bp > sizeof(linetable) - 6) {
				if (write(t->fd, linetable, bp) != bp) {
					lwsl_err("%s: linetable write failed\n",
							__func__);
					return 1;
				}
				t->c += bp;
				bp = 0;
				// assert(lseek(t->fd, 0, SEEK_END) == t->c);
			}

			chars += t->chars_in_line;
			t->chars_in_line = 0;

			/*
			 * Detect overlength lines and skip them (eg, BASE64
			 * in css etc)
			 */

			if (len > 200) {
				n = 0;
				m = 0;
				while (n < 200 && m < 80 && buf[n] != '\n') {
				       if (buf[n] == ' ' || buf[n] == '\t')
					       m = 0;
					n++;
					m++;
				}

				/* 80 lines no whitespace, or >=200-char line */

				if (m == 80 || n == 200)
					skipline = 1;
			}

			goto seal;
		}
		if (skipline)
			continue;

		m = classify[(int)c];
		if (!m)
			goto seal;
		if (m == 2)
			c += 'a' - 'A';

		if (t->aggregate) {

			/*
			 * We created a trie entry for an earlier char in this
			 * symbol already.  So we know at the moment, any
			 * further chars in the symbol are the only children.
			 *
			 * Aggregate them and add them as a string suffix to
			 * the trie symbol at the end (when we know how much to
			 * allocate).
			 */

			if (t->agg_pos < sizeof(t->agg) - 1)
				/* symbol is not too long to stash */
				t->agg[t->agg_pos++] = c;

			continue;
		}

		if (t->str_match_pos) {
			go_around = 1;
			goto seal;
		}

		/* zeroth-iteration child matching */

		if (t->parser == t->root) {
			e = t->root_lookup[(int)c];
			if (e) {
				t->parser = e;
				continue;
			}
		} else {

			/* look for the char amongst the children */

			e = t->parser->child_list;
			while (e) {

				/* since they're alpha ordered... */
				if (e->c > c) {
					e = NULL;
					break;
				}
				if (e->c == c) {
					t->parser = e;

					if (e->suffix)
						t->str_match_pos = 1;

					break;
				}

				e = e->sibling;
			}

			if (e)
				continue;
		}

		/*
		 * we are blazing a new trail, add a new child representing
		 * the whole suffix that couldn't be matched until now.
		 */

		e = lws_fts_entry_child_add(t, c, t->parser);
		if (!e) {
			lwsl_err("%s: lws_fts_entry_child_add failed\n",
					__func__);
			return 1;
		}

		/* if it's the root node, keep the root_lookup table in sync */

		if (t->parser == t->root)
			t->root_lookup[(int)c] = e;

		/* follow the new path */
		t->parser = e;

		{
			struct lws_fts_entry **pe = &e->child_list;
			while (*pe) {
				assert((*pe)->parent == e);

				pe = &(*pe)->sibling;
			}
		}

		/*
		 * If there are any more symbol characters coming, just
		 * create a suffix string on t->parser instead of what must
		 * currently be single-child nodes, since we just created e
		 * as a child with a single character due to no existing match
		 * on that single character... so if no match on 'h' with this
		 * guy's parent, we created e that matches on the single char
		 * 'h'.  If the symbol continues ... 'a' 'p' 'p' 'y', then
		 * instead of creating singleton child nodes under e,
		 * modify e to match on the whole string suffix "happy".
		 *
		 * If later "hoppy" appears, we will remove the suffix on e,
		 * so it reverts to a char match for 'h', add singleton children
		 * for 'a' and 'o', and attach a "ppy" suffix child to each of
		 * those.
		 *
		 * We want to do this so we don't have to allocate trie entries
		 * for every char in the string to save memory and consequently
		 * time.
		 *
		 * Don't try this optimization if the parent is the root node...
		 * it's not compatible with it's root_lookup table and it's
		 * highly likely children off the root entry are going to have
		 * to be fragmented.
		 */

		if (e->parent != t->root) {
			t->aggregate = 1;
			t->agg_pos = 0;
		}

		continue;

seal:
		if (t->str_match_pos) {

			/*
			 * We're partway through matching an elaborated string
			 * on a child, not just a character.  String matches
			 * only exist when we met a child entry that only had
			 * one path until now... so we had an 'h', and the
			 * only child had a string "hello".
			 *
			 * We are following the right path and will not need
			 * to back up, but we may find as we go we have the
			 * first instance of a second child path, eg, "help".
			 *
			 * When we get to the 'p', we have to split what was
			 * the only string option "hello" into "hel" and then
			 * two child entries, for "lo" and 'p'.
			 */

			if (c == t->parser->suffix[t->str_match_pos++]) {
				if (t->str_match_pos < t->parser->suffix_len)
					continue;

				/*
				 * We simply matched everything, continue
				 * parsing normally from this trie entry.
				 */

				t->str_match_pos = 0;
				continue;
			}

			/*
			 * So... we hit a mismatch somewhere... it means we
			 * have to split this string entry.
			 *
			 * We know the first char actually matched in order to
			 * start down this road.  So for the current trie entry,
			 * we need to truncate his suffix at the char before
			 * this mismatched one, where we diverged (if the
			 * second char, simply remove the suffix string from the
			 * current trie entry to turn it back to a 1-char match)
			 *
			 * The original entry, which becomes the lhs post-split,
			 * is t->parser.
			 */

			olen = t->parser->suffix_len;
			osuff = t->parser->suffix;

			if (t->str_match_pos == 2)
				t->parser->suffix = NULL;
			else
				t->parser->suffix_len = t->str_match_pos - 1;

			/*
			 * Then we need to create a new child trie entry that
			 * represents the remainder of the original string
			 * path that we didn't match.  For the "hello" /
			 * "help" case, this guy will have "lo".
			 *
			 * Any instances or children (not siblings...) that were
			 * attached to the original trie entry must be detached
			 * first and then migrate to this new guy that completes
			 * the original string.
			 */

			dcl = t->parser->child_list;
			m = t->parser->child_count;

			t->parser->child_list = NULL;
			t->parser->child_count = 0;

			e = lws_fts_entry_child_add(t,
					osuff[t->str_match_pos - 1], t->parser);
			if (!e) {
				lwsl_err("%s: lws_fts_entry_child_add fail1\n",
						__func__);
				return 1;
			}

			e->child_list = dcl;
			e->child_count = m;
			/*
			 * any children we took over must point to us as the
			 * parent now they appear on our child list
			 */
			e1 = e->child_list;
			while (e1) {
				e1->parent = e;
				e1 = e1->sibling;
			}

			/*
			 * We detached any children, gave them to the new guy
			 * and replaced them with just our new guy
			 */
			t->parser->child_count = 1;
			t->parser->child_list = e;

			/*
			 * any instances that belonged to the original entry we
			 * are splitting now must be reassigned to the end
			 * part
			 */

			e->inst_file_list = t->parser->inst_file_list;
			if (e->inst_file_list)
				e->inst_file_list->owner = e;
			t->parser->inst_file_list = NULL;
			e->instance_count = t->parser->instance_count;
			t->parser->instance_count = 0;

			e->ofs_last_inst_file = t->parser->ofs_last_inst_file;
			t->parser->ofs_last_inst_file = 0;

			if (t->str_match_pos != olen) {
				/* we diverged partway */
				e->suffix = &osuff[t->str_match_pos - 1];
				e->suffix_len = olen - (t->str_match_pos - 1);
			}

			/*
			 * if the current char is a terminal, skip creating a
			 * new way forward.
			 */

			if (classify[(int)c]) {

				/*
				 * Lastly we need to create a new child trie
				 * entry that represents the new way forward
				 * from the point that we diverged.  For the
				 * "hello" / "help" case, this guy will start
				 * as a child of "hel" with the single
				 * character match 'p'.
				 *
				 * Since he becomes the current parser context,
				 * more symbol characters may be coming to make
				 * him into, eg, "helping", in which case he
				 * will acquire a suffix eventually of "ping"
				 * via the aggregation stuff
				 */

				e = lws_fts_entry_child_add(t, c, t->parser);
				if (!e) {
					lwsl_err("%s: child_add fail2\n",
						 __func__);
					return 1;
				}
			}

			/* go on following this path */
			t->parser = e;

			t->aggregate = 1;
			t->agg_pos = 0;

			t->str_match_pos = 0;

			if (go_around)
				continue;

			/* this is intended to be a seal */
		}


		/* end of token */

		if (t->aggregate && t->agg_pos) {

			/* if nothing in agg[]: leave as single char match */

			/* otherwise copy out the symbol aggregation */
			t->parser->suffix = lwsac_use(&t->lwsac_head,
						    t->agg_pos + 1,
						    TRIE_LWSAC_BLOCK_SIZE);
			if (!t->parser->suffix) {
				lwsl_err("%s: lac for suffix failed\n",
						__func__);
				return 1;
			}

			/* add the first char at the beginning */
			*t->parser->suffix = t->parser->c;
			/* and then add the agg buffer stuff */
			memcpy(t->parser->suffix + 1, t->agg, t->agg_pos);
			t->parser->suffix_len = t->agg_pos + 1;
		}
		t->aggregate = 0;

		if (t->parser == t->root) /* multiple terminal chars */
			continue;

		if (!t->parser->inst_file_list ||
		    t->parser->inst_file_list->file_index != file_index) {
			tif = lwsac_use(&t->lwsac_input_head, sizeof(*tif),
				      TRIE_LWSAC_BLOCK_SIZE);
			if (!tif) {
				lwsl_err("%s: lac for tif failed\n",
						__func__);
				return 1;
			}

			tif->file_index = file_index;
			tif->owner = t->parser;
			tif->lines_list = NULL;
			tif->lines_tail = NULL;
			tif->total = 0;
			tif->count = 0;
			tif->inst_file_next = t->tif_list;
			t->tif_list = tif;

			t->parser->inst_file_list = tif;
		}

		/*
		 * A naive allocation strategy for this leads to 50% of the
		 * total inmem lac allocation being for line numbers...
		 *
		 * It's mainly solved by only holding the instance and line
		 * number tables for the duration of a file being input, as soon
		 * as one input file is finished it is written to disk.
		 *
		 * For the common case of 1 - ~3 matches the line number are
		 * stored in a small VLI array inside the filepath inst.  If the
		 * next one won't fit, it allocates a line number struct with
		 * more vli space and continues chaining those if needed.
		 */

		n = wq32(vlibuf, t->line_number);
		tif = t->parser->inst_file_list;

		if (!tif->lines_list) {
			/* we are still trying to use the file inst vli */
			if (LWS_ARRAY_SIZE(tif->vli) - tif->count >= n) {
				tif->count += wq32(tif->vli + tif->count,
						   t->line_number);
				goto after;
			}
			/* we are going to have to allocate */
		}

		/* can we add to an existing line numbers struct? */
		if (tif->lines_tail &&
		    LWS_ARRAY_SIZE(tif->lines_tail->vli) -
		    	    tif->lines_tail->count >= n) {
			tif->lines_tail->count += wq32(tif->lines_tail->vli +
						       tif->lines_tail->count,
						       t->line_number);
			goto after;
		}

		/* either no existing line numbers struct at tail, or full */

		/* have to create a(nother) line numbers struct */
		tl = lwsac_use(&t->lwsac_input_head, sizeof(*tl),
			     TRIE_LWSAC_BLOCK_SIZE);
		if (!tl) {
			lwsl_err("%s: lac for tl failed\n", __func__);
			return 1;
		}
		tl->lines_next = NULL;
		if (tif->lines_tail)
			tif->lines_tail->lines_next = tl;

		tif->lines_tail = tl;
		if (!tif->lines_list)
			tif->lines_list = tl;

		tl->count = wq32(tl->vli, t->line_number);
after:
		tif->total++;
#if 0
		{
			char s[128];
			const char *ne = name_entry(t->parser, s, sizeof(s));

			if (!strcmp(ne, "describ")) {
				lwsl_err("     %s %d\n", ne, t->str_match_pos);
				write(1, buf - 10, 20);
			}
		}
#endif
		t->parser->instance_count++;
		t->parser = t->root;
		t->str_match_pos = 0;
	}

	/* seal off the line length table block */

	if (bp) {
		if (write(t->fd, linetable, bp) != bp)
			return 1;
		t->c += bp;
		bp = 0;
	}

	if (lseek(t->fd, lbh, SEEK_SET) < 0) {
		lwsl_err("%s: seek to 0x%llx failed\n", __func__,
				(unsigned long long)lbh);
		return 1;
	}

	g16(linetable, t->c - lbh);
	g16(linetable + 2, t->line_number - sline);
	g32(linetable + 4, chars);
	if (write(t->fd, linetable, 8) != 8) {
		lwsl_err("%s: write linetable header failed\n", __func__);
		return 1;
	}

	assert(lseek(t->fd, 0, SEEK_END) == (off_t)t->c);

	if (lseek(t->fd, t->c, SEEK_SET) < 0) {
		lwsl_err("%s: end seek failed\n", __func__);
		return 1;
	}

	bp = 0;

	if (len) {
		t->lines_in_unsealed_linetable = 0;
		goto resume;
	}

	/* dump the collected per-input instance and line data, and free it */

	t->agg_trie_creation_us += lws_time_in_microseconds() - tf;

	return 0;
}

/* refer to ./README.md */

int
lws_fts_serialize(struct lws_fts *t)
{
	struct lws_fts_filepath *fp = t->filepath_list, *ofp;
	unsigned long long tf = lws_time_in_microseconds();
	struct lws_fts_entry *e, *e1, *s[256];
	unsigned char buf[8192], stasis;
	int n, bp, sp = 0, do_parent;

	(void)tf;
	finalize_per_input(t);

	/*
	 * Compute aggregated instance counts (parents should know the total
	 * number of instances below each child path)
	 *
	 *
	 * If we have
	 *
	 * (root) -> (c1) -> (c2)
	 *        -> (c3)
	 *
	 * we need to visit the nodes in the order
	 *
	 * c2, c1, c3, root
	 */

	sp = 0;
	s[0] = t->root;
	do_parent = 0;
	while (sp >= 0) {
		int n;

		/* aggregate in every antecedent */

		for (n = 0; n <= sp; n++) {
			s[n]->agg_inst_count += s[sp]->instance_count;
			s[n]->agg_child_count += s[sp]->child_count;
		}

		/* handle any children before the parent */

		if (s[sp]->child_list) {
			if (sp + 1 == LWS_ARRAY_SIZE(s)) {
				lwsl_err("Stack too deep\n");

				goto bail;
			}

			s[sp + 1] = s[sp]->child_list;
			sp++;
			continue;
		}

		do {
			if (s[sp]->sibling) {
				s[sp] = s[sp]->sibling;
				break;
			} else
				sp--;
		} while (sp >= 0);
	}

	/* dump the filepaths and set prev */

	fp = t->filepath_list;
	ofp = NULL;
	bp = 0;
	while (fp) {

		fp->ofs = t->c + bp;
		n = (int)strlen(fp->filepath);
		spill(15 + n, 0);

		bp += wq32(&buf[bp], fp->line_table_ofs);
		bp += wq32(&buf[bp], fp->total_lines);
		bp += wq32(&buf[bp], n);
		memcpy(&buf[bp], fp->filepath, n);
		bp += n;

		fp->prev = ofp;
		ofp = fp;
		fp = fp->next;
	}

	spill(0, 1);

	/* record the fileoffset of the filepath map and filepath count */

	if (lseek(t->fd, 0xc, SEEK_SET) < 0)
		goto bail_seek;

	g32(buf, t->c + bp);
	g32(buf + 4, t->next_file_index);
	if (write(t->fd, buf, 8) != 8)
		goto bail;

	if (lseek(t->fd, t->c + bp, SEEK_SET) < 0)
		goto bail_seek;

	/* dump the filepath map, starting from index 0, which is at the tail */

	fp = ofp;
	bp = 0;
	while (fp) {
		spill(5, 0);
		g32(buf + bp, fp->ofs);
		bp += 4;
		fp = fp->prev;
	}
	spill(0, 1);

	/*
	 * The trie entries in reverse order... because of the reversal, we have
	 * always written children first, and marked them with their file offset
	 * before we come to refer to them.
	 */

	bp = 0;
	sp = 0;
	s[0] = t->root;
	do_parent = 0;
	while (s[sp]) {

		/* handle any children before the parent */

		if (!do_parent && s[sp]->child_list) {

			if (sp + 1 == LWS_ARRAY_SIZE(s)) {
				lwsl_err("Stack too deep\n");

				goto bail;
			}

			s[sp + 1] = s[sp]->child_list;
			sp++;
			continue;
		}

		/* leaf nodes with no children */

		e = s[sp];
		e->ofs = t->c + bp;

		/* write the trie entry header */

		spill((3 * MAX_VLI), 0);

		bp += wq32(&buf[bp], e->ofs_last_inst_file);
		bp += wq32(&buf[bp], e->child_count);
		bp += wq32(&buf[bp], e->instance_count);
		bp += wq32(&buf[bp], e->agg_inst_count);

		/* sort the children in order of highest aggregate hits first */

		do {
			struct lws_fts_entry **pe, *te1, *te2;

			stasis = 1;

			/* bubble sort keeps going until nothing changed */

			pe = &e->child_list;
			while (*pe) {

				te1 = *pe;
				te2 = te1->sibling;

				if (te2 && te1->agg_inst_count <
					   te2->agg_inst_count) {
					stasis = 0;

					*pe = te2;
					te1->sibling = te2->sibling;
					te2->sibling = te1;
				}

				pe = &(*pe)->sibling;
			}

		} while (!stasis);

		/* write the children */

		e1 = e->child_list;
		while (e1) {
			spill((5 * MAX_VLI) + e1->suffix_len + 1, 0);

			bp += wq32(&buf[bp], e1->ofs);
			bp += wq32(&buf[bp], e1->instance_count);
			bp += wq32(&buf[bp], e1->agg_inst_count);
			bp += wq32(&buf[bp], e1->agg_child_count);

			if (e1->suffix) { /* string  */
				bp += wq32(&buf[bp], e1->suffix_len);
				memmove(&buf[bp], e1->suffix, e1->suffix_len);
				bp += e1->suffix_len;
			} else { /* char */
				bp += wq32(&buf[bp], 1);
				buf[bp++] = e1->c;
			}
#if 0
			if (e1->suffix && e1->suffix_len == 3 &&
			    !memcmp(e1->suffix, "cri", 3)) {
				struct lws_fts_entry *e2;

				e2 = e1;
				while (e2){
					if (e2->suffix)
						lwsl_notice("%s\n", e2->suffix);
					else
						lwsl_notice("%c\n", e2->c);

					e2 = e2->parent;
				}

				lwsl_err("*** %c CRI inst %d ch %d\n", e1->parent->c,
						e1->instance_count, e1->child_count);
			}
#endif
			e1 = e1->sibling;
		}

		/* if there are siblings, do those next */

		if (do_parent) {
			do_parent = 0;
			sp--;
		}

		if (s[sp]->sibling)
			s[sp] = s[sp]->sibling;
		else {
			/* if there are no siblings, do the parent */
			do_parent = 1;
			s[sp] = s[sp]->parent;
		}
	}

	spill(0, 1);

	assert(lseek(t->fd, 0, SEEK_END) == (off_t)t->c);

	/* drop the correct root trie offset + file length into the header */

	if (lseek(t->fd, 4, SEEK_SET) < 0) {
		lwsl_err("%s: unable to seek\n", __func__);

		goto bail;
	}

	g32(buf, t->root->ofs);
	g32(buf + 4, t->c);
	if (write(t->fd, buf, 0x8) != 0x8)
		goto bail;

	lwsl_notice("%s: index %d files (%uMiB) cpu time %dms, "
		    "alloc: %dKiB + %dKiB, "
		    "serialize: %dms, file: %dKiB\n", __func__,
		    t->next_file_index,
		    (int)(t->agg_raw_input / (1024 * 1024)),
		    (int)(t->agg_trie_creation_us / 1000),
		    (int)(lwsac_total_alloc(t->lwsac_head) / 1024),
		    (int)(t->worst_lwsac_input_size / 1024),
		    (int)((lws_time_in_microseconds() - tf) / 1000),
		    (int)(t->c / 1024));

	return 0;

bail_seek:
	lwsl_err("%s: problem seekings\n", __func__);

bail:
	return 1;
}