From 20e52f326cdf1b6c2ca9b2c0b5be07637d9196d2 Mon Sep 17 00:00:00 2001
From: flu0r1ne <flu0r1ne@flu0r1ne.net>
Date: Sun, 30 Oct 2022 19:30:29 -0500
Subject: Initial commit

---
 README.md           |   22 +
 main.c              |  928 ++++++++++++++++++++++++++++++++++++
 makefile            |    3 +
 util/err.c          |   80 ++++
 util/hash.c         |   46 ++
 util/hash.h         |   27 ++
 util/prime_search.c |  344 ++++++++++++++
 util/prime_search.h |    8 +
 util/sds/sds.c      | 1300 +++++++++++++++++++++++++++++++++++++++++++++++++++
 util/sds/sds.h      |  274 +++++++++++
 util/sds/sdsalloc.h |   42 ++
 util/util.h         |   78 ++++
 12 files changed, 3152 insertions(+)
 create mode 100644 README.md
 create mode 100644 main.c
 create mode 100644 makefile
 create mode 100644 util/err.c
 create mode 100644 util/hash.c
 create mode 100644 util/hash.h
 create mode 100644 util/prime_search.c
 create mode 100644 util/prime_search.h
 create mode 100644 util/sds/sds.c
 create mode 100644 util/sds/sds.h
 create mode 100644 util/sds/sdsalloc.h
 create mode 100644 util/util.h

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ff14ab3
--- /dev/null
+++ b/README.md
@@ -0,0 +1,22 @@
+Bam Query Index (qidx)
+=====================
+
+> Warning: this is a work in progress
+
+`qidx` is tool for indexing BAM alignments by query name. While the
+samtools have the ability to sort data by query name (also called
+the read name), there htslib does not provide built-in utilities
+to retrieve alignments by query name. This can be advantageous
+for examining multi-mapped alignments.
+
+While a utility [bri](https://github.com/jts/bri) predated `qidx`
+providing the same utilities, it reads all alignments into memory
+which is impractical for most human genome data.
+
+Notes:
+
+- `qidx` creates a disk-backed using a sparse memory-mapped file. The underlying
+operating system must support `mmap` and file holes
+- `qidx` doesn't currently support compression. it is currently recommended to
+use block-level compression (such as `zfs` `zstd` compression)
+- the bamfile must be sorted by query name before the index is built `samtools sort -n`
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..aa2f3d6
--- /dev/null
+++ b/main.c
@@ -0,0 +1,928 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <htslib/sam.h>
+#include <htslib/hts.h>
+#include <htslib/bgzf.h>
+#include <stdbool.h>
+
+#include "util/sds/sds.h"
+#include "util/prime_search.h"
+#include "util/hash.h"
+#include "util/util.h"
+
+#include <endian.h>
+
+// @abstract Structure representing an unique alignment and virtual file pointer
+// @field tid   chromosome ID
+// @field pos   0-based leftmost coordinate
+// @field vptr  virtual pointer
+typedef struct {
+	uint32_t tid;
+	uint32_t pos;
+	uint64_t vptr;
+} aln_spec_t;
+
+
+#pragma pack(push, 1)
+
+typedef struct {
+	uint32_t tid;
+	uint32_t pos;
+	uint64_t vptr;
+} aln_spec_packed_t;
+
+#pragma pack(pop)
+
+// constrains (qname < 2 ** 32)
+
+// @abstract Structure representing all alignments by queryname
+// @field qname    queryname (name for the given read)
+// @field n_alns   number of alignments
+// @field alns     array with alignments structures
+typedef struct {
+	char * qname;
+	uint16_t n_alns;
+	aln_spec_t * alns;
+} qidx_record_t;
+
+// @abstract Get the marshaled size of a record
+static size_t qidx_record_msize(qidx_record_t const * q) {
+	return (
+		strlen(q->qname) + 1 +
+		sizeof(uint16_t) +
+		sizeof(aln_spec_packed_t) * q->n_alns
+	);
+}
+
+// @abstract Martial a record to a fixed buffer
+//
+// @param q      query index record
+// @param dst    destination buffer
+// @param dstlen length of the destination buffer
+//
+// @return       returns the number of bytes written to the buffer
+//               or < 0 if the destination buffer is exceeded
+ssize_t qidx_record_marital(qidx_record_t const * q, void * dst, size_t dstlen) {
+	size_t rem = dstlen;
+
+	size_t n = strlen(q->qname) + 1;
+	if(rem < n)
+		return -1;
+	rem -= n;
+
+	{
+		char * _dst = dst;
+		strcpy(_dst, q->qname);
+		dst = (_dst + n);
+	}
+
+	size_t sz = 0;
+	sz += sizeof(uint16_t);
+	sz += sizeof(aln_spec_packed_t) * q->n_alns;
+
+	if(rem < sz)
+		return -1;
+	rem -= sz;
+
+	{
+		uint16_t * _dst = dst;
+		*_dst = htole16(q->n_alns);
+		dst = _dst + 1;
+	}
+
+	aln_spec_packed_t * buf_aln = dst;
+	for(size_t i = 0; i < q->n_alns; i++) {
+		aln_spec_t * aln = &q->alns[i];
+
+		buf_aln->tid = htole32(aln->tid);
+		buf_aln->pos = htole32(aln->pos);
+		buf_aln->vptr = htole64(aln->vptr);
+
+		buf_aln++;
+	}
+
+	return dstlen - rem;
+}
+
+// @abstract Helper to parse the query name and the number of alignments
+// @returns  NULL if a parsing error occured else the location after the buffer
+static void const * _parse_record_header(void const * buf, size_t buflen,
+		char const ** qname, uint16_t * n_alns) {
+
+	size_t n = strnlen(buf, buflen) + 1;
+	*qname = buf;
+
+	if(n + sizeof(uint16_t) > buflen)
+		return NULL;
+
+	uint16_t const * _n_alns = (uint16_t const *) (*qname + n);
+	*n_alns = le64toh(*_n_alns);
+	buf = _n_alns + 1;
+
+	if(sizeof(aln_spec_packed_t) * *n_alns > buflen)
+		return NULL;
+
+	return buf;
+}
+
+// @abstract Unmartial a query record. The resulting memory needs to be freed.
+//           All strings are sds strings.
+//
+// @param q      query index record
+// @param buf    source buffer
+// @param buflen source buffer length
+//
+// @return       a pointer within buf after the record
+//               or NULL if a parsing error occurred
+void const * qidx_record_unmartial(qidx_record_t * q, void const * buf, size_t buflen) {
+	char const * qname;
+
+	if(!(buf = _parse_record_header(buf, buflen, &qname, &q->n_alns))) {
+		return NULL;
+	}
+
+	q->qname = sdsnew(qname);
+	q->alns = calloc(q->n_alns, sizeof(aln_spec_t));
+
+	aln_spec_packed_t const * buf_aln = buf;
+	for(size_t i = 0; i < q->n_alns; i++) {
+		aln_spec_t * aln = &q->alns[i];
+
+		aln->pos = le32toh(buf_aln->pos);
+		aln->tid = le32toh(buf_aln->tid);
+		aln->vptr = le64toh(buf_aln->vptr);
+
+		buf_aln++;
+	}
+
+	return buf_aln;
+}
+
+// @abstract Find a record efficiently in a marshaled buffer of query index records
+//
+// @param buf    source buffer
+// @param buflen length of the source buffer
+// @param qname  target queryname
+//
+// @returns a pointer to a target query or NULL if it a record with the target query name
+//          does not exist in the buffer
+void const * qidx_record_martial_find(void const * buf, size_t buflen, char const * qname) {
+	size_t n = strnlen(buf, buflen);
+
+	char const * _qname;
+	uint16_t n_alns;
+
+	void const * cur = buf,
+				  * next = NULL;
+
+	while((next = _parse_record_header(cur, buflen, &_qname, &n_alns))) {
+
+		if(strcmp(_qname, qname) == 0) {
+			return cur;
+		}
+
+		next = (aln_spec_packed_t *) next + n_alns;
+		size_t l = (char const *) next - (char const *) cur;
+		buflen -= l;
+		cur = next;
+
+		if(!buflen) {
+			return NULL;
+		}
+	}
+
+	// Invalid
+	return NULL;
+}
+
+typedef enum {
+	QIDX_OK = 0,
+	QIDX_ITER_DONE,
+	QIDX_NO_MEM,
+	QIDX_NO_BAM_HDR,
+	QIDX_BAM_READ_FAILURE,
+	QIDX_REC_ITER_FAILURE,
+	QIDX_NOT_SORTED,
+	QIDX_BUCKET_MAX_SIZE_EXCEEDED,
+	QIDX_MAP_FAIL,
+	QIDX_IO_FAIL,
+	QIDX_FAILED_TO_OPEN_BAMFILE,
+	QIDX_FAILED_TO_OPEN_INDEX_FILE,
+	QIDX_INVALID_VERSION,
+	QIDX_INVALID_MAGIC,
+} qidx_err_t;
+
+bool qidx_errno(qidx_err_t err) {
+	switch(err) {
+		case QIDX_MAP_FAIL:
+		case QIDX_IO_FAIL:
+			return true;
+		default:
+			return false;
+	}
+
+}
+
+char const * qidx_strerr(qidx_err_t err) {
+	switch(err) {
+		case QIDX_NO_MEM: return "memory was exausted";
+		case QIDX_NO_BAM_HDR: return "parsing BAM header failed";
+		case QIDX_BAM_READ_FAILURE: return "failed to read bamfile";
+		case QIDX_REC_ITER_FAILURE: return "bam query index record iterator failed";
+		case QIDX_NOT_SORTED: return "input bam file not sorted by query name";
+		case QIDX_BUCKET_MAX_SIZE_EXCEEDED: return "maximum query size exceeded";
+		case QIDX_MAP_FAIL: return "mmap failed";
+		case QIDX_IO_FAIL: return "qidx io failure";
+		case QIDX_FAILED_TO_OPEN_BAMFILE: return "failed to open bamfile";
+		case QIDX_FAILED_TO_OPEN_INDEX_FILE: return "failed to open indexfile";
+		case QIDX_INVALID_MAGIC: return "invalid magic number (not a qidx or corrupted)";
+		case QIDX_INVALID_VERSION: return "qidx version outdated, please regenerate";
+		case QIDX_ITER_DONE:
+		case QIDX_OK:
+			return "OK";
+	}
+}
+
+qidx_err_t init_qidx(qidx_record_t * r) {
+	return QIDX_OK;
+}
+
+void qidx_free(qidx_record_t * r) {
+	free(r);
+}
+
+/*
+ * SAMTOOLS SORTING ALGORITHM
+ */
+
+// strnum_cmp is the string comparator used by samtools sort
+// to sort bam files by read names (e.g. QNAME)
+//
+// The input file is expected to be ordered accordingly. This
+// is verified at runtime.
+#define is_digit(c) ((c)<='9' && (c)>='0')
+static int strnum_cmp(const char *_a, const char *_b)
+{
+    const unsigned char *a = (const unsigned char*)_a, *b = (const unsigned char*)_b;
+    const unsigned char *pa = a, *pb = b;
+    while (*pa && *pb) {
+        if (!is_digit(*pa) || !is_digit(*pb)) {
+            if (*pa != *pb)
+                return (int)*pa - (int)*pb;
+            ++pa; ++pb;
+        } else {
+            // skip leading zeros
+            while (*pa == '0') ++pa;
+            while (*pb == '0') ++pb;
+
+            // skip matching digits
+            while (is_digit(*pa) && *pa == *pb)
+                pa++, pb++;
+
+            // Now mismatching, so see which ends the number sooner
+            int diff = (int)*pa - (int)*pb;
+            while (is_digit(*pa) && is_digit(*pb))
+                pa++, pb++;
+
+            if (is_digit(*pa))
+                return  1; // pa still going, so larger
+            else if (is_digit(*pb))
+                return -1; // pb still going, so larger
+            else if (diff)
+                return diff; // same length, so earlier diff
+        }
+    }
+    return *pa? 1 : *pb? -1 : 0;
+}
+
+
+typedef struct {
+	htsFile *fp;
+	bam1_t * b;
+	bam_hdr_t * hdr;
+
+	uint64_t file_offset;
+	sds queryname;
+	int ret;
+
+	aln_spec_t * aln_specs;
+	uint16_t n_aln_cap;
+	uint16_t n_aln_sz;
+
+	qidx_record_t * pending_release;
+} qidx_record_iter_t;
+
+qidx_err_t qidx_record_iter_init(qidx_record_iter_t *it, htsFile *fp) {
+	it->b = bam_init1();
+	it->fp = fp;
+
+	// advance past header
+	it->hdr = sam_hdr_read(fp);
+
+	if(!it->hdr) {
+		return QIDX_NO_BAM_HDR;
+	}
+
+	it->file_offset = bgzf_tell(fp->fp.bgzf);
+
+	if((it->ret = sam_read1(it->fp, it->hdr, it->b)) < 0) {
+		return QIDX_BAM_READ_FAILURE;
+	}
+
+	it->queryname = sdsnew(bam_get_qname(it->b));
+	it->aln_specs = NULL;
+	it->n_aln_sz = it->n_aln_cap = 0;
+
+	if(bgzf_seek(fp->fp.bgzf, it->file_offset, SEEK_SET) < 0) {
+		return QIDX_IO_FAIL;
+	}
+
+	it->pending_release = NULL;
+
+	return QIDX_OK;
+}
+
+static void _qidx_record_iter_free_record(qidx_record_iter_t * it) {
+	if(it->pending_release) {
+		free(it->pending_release->alns);
+		sdsfree(it->pending_release->qname);
+		it->pending_release = NULL;
+	}
+}
+
+qidx_err_t qidx_record_iter_next(qidx_record_iter_t * it, qidx_record_t * rec) {
+	
+	_qidx_record_iter_free_record(it);
+
+	sds queryname;
+	int cmp;
+	int ret;
+	bool yield = false;
+	while ((ret = sam_read1(it->fp, it->hdr, it->b)) >= 0) {
+		queryname = sdsnew(bam_get_qname(it->b));
+
+		if(!queryname) {
+			return QIDX_NO_MEM;
+		}
+
+		if((cmp = strnum_cmp(it->queryname, queryname)) != 0) {
+			if(cmp > 0) {
+				printf("Err: the file is not sorted\n");
+				return QIDX_NOT_SORTED;
+			}
+
+			rec->n_alns = it->n_aln_sz;
+			rec->alns = it->aln_specs;
+			rec->qname = it->queryname;
+
+			it->pending_release = rec;
+
+			yield = true;
+
+			it->queryname = sdsdup(queryname);
+			it->aln_specs = NULL;
+			it->n_aln_cap = it->n_aln_sz = 0;
+		}
+
+		AM_RESIZE(it->aln_specs, it->n_aln_sz + 1, it->n_aln_cap);
+
+		it->aln_specs[it->n_aln_sz].pos = it->b->core.pos;
+		it->aln_specs[it->n_aln_sz].tid = it->b->core.tid;
+		it->aln_specs[it->n_aln_sz].vptr = it->file_offset;
+
+		it->n_aln_sz++;
+
+		// update offset for next record
+		it->file_offset = bgzf_tell(it->fp->fp.bgzf);
+
+		sdsfree(queryname);
+
+		if(yield) {
+			break;
+		}
+	}
+
+	if(ret < -1) {
+		it->pending_release = NULL;
+		return QIDX_BAM_READ_FAILURE;
+	} else if (ret == -1 && it->queryname != NULL) {
+			rec->n_alns = it->n_aln_sz;
+			rec->alns = it->aln_specs;
+			rec->qname = it->queryname;
+
+			it->pending_release = rec;
+
+			yield = true;
+
+			it->queryname = NULL;
+			it->aln_specs = NULL;
+			it->n_aln_cap = it->n_aln_sz = 0;
+	}
+
+	if(!yield) {
+		return QIDX_ITER_DONE;
+	}
+
+	return QIDX_OK;
+}
+
+qidx_err_t qidx_record_iter_destroy(qidx_record_iter_t * it) {
+	_qidx_record_iter_free_record(it);
+
+	sdsfree(it->queryname);
+
+	bam_hdr_destroy(it->hdr);
+	bam_destroy1(it->b);
+
+	return QIDX_OK;
+}
+
+#include <math.h>
+
+// @abstract Structure containing summary statistics estimated using
+//           a one-pass algorithm
+// @field mu mean
+// @field s  sample variance
+// @field n  number of entries
+typedef struct {
+	double mu;
+	double s;
+	size_t n;
+} roll_est_params_t;
+
+// @abstract observe a data point
+void roll_est_add_evidence(roll_est_params_t * p, double x) {
+	double delta = x - p->mu;
+	p->mu += delta / (p->n + 1);
+	p->s += (x - p->mu) * delta;
+	p->n++;
+}
+
+// @abstract finalize the estimate
+void roll_est_finalize(roll_est_params_t * p) {
+	p->s /= p->n;
+}
+
+// Calculates the maximum number of items placed into a single bucket
+// to not exceed the bucket_size with a probability specified by the
+// area to the right of the positive z-score z. E.g. at z=2 there is
+// 2.4% chance of the soft limit being exceeded given objects are
+// assigned uniformly to each bucket. (The cardinality of each bucket
+// is the same.)
+//
+// In order for the maximum size to (really) not be exceeded, the
+// binomial distribution of items in each bucket would need to be
+// included. It is recommended to set this soft limit much lower
+// than the hard limit to not exceed it.
+//
+// This is derived from the standard error as measured by the sampling
+// distribution.
+
+// @param soft_limit  a soft limit on the number of variable-length items
+//                    stuffed into a single bucket
+// @param z           the z-score with which this will happen
+// @param p           the parameters of the normal distribution
+size_t estimate_n_buckets(double soft_limit, double z, roll_est_params_t const * p) {
+	double n = sqrt(z * z * p->s + 4 * p->mu * soft_limit) - z * sqrt(p->s);
+	n = n / (2 * p->mu);
+
+	int max_bucket_size = floor(n * n);
+
+	return (size_t) ceil((double) p->n / (double) max_bucket_size);
+}
+
+typedef struct {
+	uint64_t offset;
+	uint32_t n_bytes;
+} qidx_bucket_t;
+
+typedef struct {
+	uint32_t n_buckets;
+	uint32_t max_bucket_size;
+	qidx_bucket_t * buckets;
+	char * rb_start;
+} qidx_htable_t;
+
+#define QIDX_MAGIC 0x5d1de6b4
+#define QIDX_VERSION 1
+
+#pragma pack(push, 1)
+
+typedef struct {
+	uint32_t magic;
+	uint16_t version;
+	uint32_t n_buckets;
+	uint32_t max_bucket_size;
+} qidx_header_t;
+
+#pragma pack(pop)
+
+typedef struct {
+	int fd;
+	void * map;
+	size_t len;
+	bool read_only;
+	qidx_header_t * hdr;
+	qidx_bucket_t * buckets;
+	qidx_htable_t * htab;
+} qidx_fp_t;
+
+size_t qidx_file_size(uint32_t n_buckets, uint32_t max_bucket_size) {
+	size_t sz = 0;
+
+	sz += sizeof(qidx_header_t);
+	sz += sizeof(qidx_htable_t) * n_buckets;
+	sz += (uint64_t) n_buckets * (uint64_t) max_bucket_size;
+
+	return sz;
+}
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+// @abstract initialize htab, must first populate fp->buckets
+qidx_err_t _qidx_init_htab(qidx_fp_t * fp,
+		uint32_t n_buckets, uint32_t max_bucket_size) {
+	qidx_htable_t * htab = malloc(sizeof(qidx_htable_t));
+	if(!htab) {
+		return QIDX_NO_MEM;
+	}
+
+	htab->buckets = calloc(sizeof(qidx_bucket_t), n_buckets);
+	if(!htab->buckets) {
+		free(htab);
+		return QIDX_NO_MEM;
+	}
+
+	htab->n_buckets = n_buckets;
+	htab->max_bucket_size = max_bucket_size;
+
+	htab->rb_start = (void *) (fp->buckets + n_buckets);
+
+	fp->htab = htab;
+
+	return QIDX_OK;
+}
+
+
+qidx_err_t qidx_open(qidx_fp_t * fp, int fd) {
+
+	fp->fd = fd;
+
+	qidx_header_t hdr;
+	if(read(fd, &hdr, sizeof(qidx_header_t)) == -1) {
+		return QIDX_FAILED_TO_OPEN_INDEX_FILE;
+	}
+
+	if(le32toh(hdr.magic) != QIDX_MAGIC) {
+		return QIDX_INVALID_MAGIC;
+	}
+
+	if(le16toh(hdr.version) != QIDX_VERSION) {
+		return QIDX_INVALID_VERSION;
+	}
+
+	uint32_t max_bucket_size = le32toh(hdr.max_bucket_size);
+	uint32_t n_buckets = le32toh(hdr.n_buckets);
+
+	size_t sz = qidx_file_size(n_buckets, max_bucket_size);
+
+	if(lseek(fd, 0, SEEK_SET) == -1) {
+		return QIDX_IO_FAIL;
+	}
+
+	fp->read_only = true;
+	fp->map = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
+	fp->len = sz;
+
+	if(fp->map == MAP_FAILED) {
+		return QIDX_MAP_FAIL;
+	}
+
+	fp->hdr = fp->map;
+	fp->buckets = (qidx_bucket_t *)(fp->hdr + 1);
+
+	qidx_err_t err = _qidx_init_htab(fp, n_buckets, max_bucket_size);
+	if(err) { return err; }
+
+	for(size_t i = 0; i < n_buckets; i++) {
+		fp->htab->buckets[i].n_bytes = le32toh(fp->buckets[i].n_bytes);
+		fp->htab->buckets[i].offset = le64toh(fp->buckets[i].offset);
+	}
+
+	return QIDX_OK;
+}
+
+qidx_err_t qidx_create(qidx_fp_t * fp, int fd, uint32_t _n_buckets, uint32_t max_bucket_size) {
+
+	uint32_t n_buckets = next_prime_above_lowerbound(_n_buckets);
+
+	size_t sz = qidx_file_size(n_buckets, max_bucket_size);
+
+	fp->fd = fd;
+
+	if(ftruncate(fd, sz) == -1) {
+		return QIDX_IO_FAIL;
+	}
+
+	fp->read_only = false;
+	fp->map = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	fp->len = sz;
+
+	if(fp->map == MAP_FAILED) {
+		return QIDX_MAP_FAIL;
+	}
+
+	fp->hdr = fp->map;
+
+	fp->hdr->magic = htole32(QIDX_MAGIC);
+	fp->hdr->version = htole16(QIDX_VERSION);
+	fp->hdr->n_buckets = htole32(n_buckets);
+	fp->hdr->max_bucket_size = htole32(max_bucket_size);
+
+	fp->buckets = (qidx_bucket_t *) (fp->hdr + 1);
+
+	_qidx_init_htab(fp, n_buckets, max_bucket_size);
+
+	uint64_t off = (char *) fp->htab->rb_start - (char *) fp->map;
+	for(size_t i = 0; i < n_buckets; i++) {
+		fp->htab->buckets[i].offset = off;
+		fp->htab->buckets[i].n_bytes = 0;
+		off += max_bucket_size;
+	}
+
+	return QIDX_OK;
+}
+
+qidx_err_t qidx_close(qidx_fp_t * fp) {
+
+	qidx_htable_t * htab = fp->htab;
+
+	if(!fp->read_only) {
+		for(size_t i = 0; i < htab->n_buckets; i++) {
+			fp->buckets[i].n_bytes = htole32(htab->buckets[i].n_bytes);
+			fp->buckets[i].offset = htole64(htab->buckets[i].offset);
+		}
+	}
+
+	free(htab->buckets);
+	free(htab);
+
+	if(munmap(fp->map, fp->len) != -1) {
+		return QIDX_MAP_FAIL;
+	}
+
+	return QIDX_OK;
+}
+
+qidx_htable_t * qidx_get_htable(qidx_fp_t * fp) { return fp->htab; }
+
+static qidx_bucket_t * _qidx_bucket(char const * qname, qidx_htable_t const * tab) {
+	size_t hash = murmur_hash_str_64s(qname, 0x12345);
+
+	qidx_bucket_t * bucket = &tab->buckets[hash % tab->n_buckets];
+
+	return bucket;
+}
+
+qidx_err_t qidx_table_add(qidx_htable_t * tab, qidx_record_t const * rec) {
+	qidx_bucket_t * bucket = _qidx_bucket(rec->qname, tab);
+
+	void * ptr = (char *) tab->rb_start + bucket->offset + bucket->n_bytes;
+	size_t rem = tab->max_bucket_size - bucket->n_bytes;
+
+	ssize_t wrlen = qidx_record_marital(rec, ptr, rem);
+
+	if(wrlen < 0) {
+		return QIDX_BUCKET_MAX_SIZE_EXCEEDED;
+	}
+
+	bucket->n_bytes += wrlen;
+
+	return QIDX_OK;
+}
+
+qidx_err_t qidx_table_get(qidx_htable_t const * tab,
+		char const * qname, qidx_record_t ** rec) {
+	qidx_bucket_t * bucket = _qidx_bucket(qname, tab);
+
+	void const * bucket_start = tab->rb_start + bucket->offset;
+	void const * record_start =
+		qidx_record_martial_find(bucket_start, bucket->n_bytes, qname);
+
+	if(!record_start) {
+		*rec = NULL;
+	} else {
+		size_t rem = bucket->n_bytes -
+						((char const *) record_start - (char const *) bucket_start);
+		*rec = malloc(sizeof(qidx_record_t));
+
+		if(!*rec) { return QIDX_NO_MEM; }
+
+		void const * next = qidx_record_unmartial(*rec, record_start, rem);
+
+		if(!next) {
+			return QIDX_IO_FAIL;
+		}
+	}
+
+	return QIDX_OK;
+}
+
+void qidx_die_err(qidx_err_t err) {
+	char const * errstr = qidx_strerr(err);
+	if(qidx_errno(err)) {
+		die_errno("err: %s", errstr);
+	} else {
+		die("err: %s", errstr);
+	}
+}
+
+void qidx_record_print(FILE * file, qidx_record_t * rec) {
+	fprintf(file, "qname: %s\n", rec->qname);
+	for(size_t i = 0; i < rec->n_alns; i++) {
+		fprintf(file, "\tTID: %u\n", rec->alns[i].tid);
+		fprintf(file, "\tPOS: %u\n", rec->alns[i].pos);
+		fprintf(file, "\tVPTR: %lu\n", rec->alns[i].vptr);
+	}
+}
+
+qidx_err_t qidx_create_index(char const * bamfile, char const * indexfile) {
+	qidx_err_t err;
+
+	htsFile *b_fp = hts_open(bamfile, "r");
+	if(b_fp == NULL) { return QIDX_FAILED_TO_OPEN_BAMFILE; }
+
+	// Pass 1:
+	// - estimate mu, s, n of the distribution of record sizes
+	// - estimate the number of buckets needed in the hash table
+	qidx_record_iter_t it;
+	if((err = qidx_record_iter_init(&it, b_fp))) {
+		return err;
+	}
+
+	roll_est_params_t params;
+	memset(&params, 0, sizeof(roll_est_params_t));
+	qidx_record_t rec;
+	while(!(err = qidx_record_iter_next(&it, &rec))) {
+		double sz = (double) qidx_record_msize(&rec);
+		roll_est_add_evidence(&params, sz);
+	}
+	if(err != QIDX_ITER_DONE) return err;
+
+	if ((err = qidx_record_iter_destroy(&it))) {
+		return err;
+	}
+
+	roll_est_finalize(&params);
+
+	double soft_limit = 8 * 1024;
+	double z = 5;
+	uint32_t n_buckets = estimate_n_buckets(soft_limit, z, &params);
+	uint32_t max_bucket_size = 1 << 24;
+
+	printf("record size (mean): %f\n", params.mu);
+	printf("record size (variance): %f\n", params.s);
+	printf("number of records: %zu\n", params.n);
+	printf("number of buckets: %u\n", n_buckets);
+
+	FILE * qidx = fopen(indexfile, "wb+");
+
+	if(!qidx) { return QIDX_FAILED_TO_OPEN_INDEX_FILE; }
+
+	int fd = fileno(qidx);
+
+	// PASS 2:
+	// - add records to index
+
+	// Rewind to start of BAM file
+	if (-1 == bgzf_seek(b_fp->fp.bgzf, 0, SEEK_SET)) {
+		return QIDX_IO_FAIL;
+	}
+
+	qidx_fp_t q_fp;
+	if((err = qidx_create(&q_fp, fd, n_buckets, max_bucket_size))) {
+		qidx_die_err(err);
+	}
+
+	qidx_htable_t * htab = qidx_get_htable(&q_fp);
+
+	if ((err = qidx_record_iter_init(&it, b_fp))) {
+		return err;
+	}
+
+	while((err = qidx_record_iter_next(&it, &rec)) == QIDX_OK) {
+		qidx_table_add(htab, &rec);
+	}
+	if(err != QIDX_ITER_DONE) return err;
+
+	if ((err = qidx_record_iter_destroy(&it))) {
+		return err;
+	}
+
+	qidx_close(&q_fp);
+	fclose(qidx);
+	hts_close(b_fp);
+
+	return QIDX_OK;
+}
+
+qidx_err_t qidx_search_index(char const * indexfile,
+		char const * bamfile, char const * qname) {
+	FILE * index = fopen(indexfile, "rb");
+	if(!index) { return QIDX_FAILED_TO_OPEN_INDEX_FILE; }
+
+	int fd = fileno(index);
+
+	qidx_fp_t q_fp;
+	qidx_err_t err = qidx_open(&q_fp, fd);
+	if(err) return err;
+
+	qidx_htable_t * htab = qidx_get_htable(&q_fp);
+
+	qidx_record_t * rec;
+	if((err = qidx_table_get(htab, qname, &rec))) {
+		return err;
+	}
+
+	if(rec) {
+		htsFile * fp = hts_open(bamfile, "r");
+		htsFile* out_fp = hts_open("-", "w");
+
+
+		if(!fp) {
+			return QIDX_FAILED_TO_OPEN_BAMFILE;
+		}
+
+		bam1_t * b = bam_init1();
+		bam_hdr_t * hdr = sam_hdr_read(fp);
+		for(size_t i = 0; i < rec->n_alns; i++) {
+			aln_spec_t * aln = &rec->alns[i];
+			
+			if(bgzf_seek(fp->fp.bgzf, aln->vptr, SEEK_SET) < 0) {
+				return QIDX_IO_FAIL;
+			}
+
+			int ret;
+			if((ret = sam_read1(fp, hdr, b)) >= 0) {
+				
+				int ret = sam_write1(out_fp, hdr, b);
+				if(ret < 0) {
+					return QIDX_IO_FAIL;
+				}
+			}
+		}
+
+		sdsfree(rec->qname);
+		free(rec->alns);
+		free(rec);
+
+		bam_hdr_destroy(hdr);
+		bam_destroy1(b);
+		hts_close(out_fp);
+		hts_close(fp);
+	} else {
+		fprintf(stderr, "Could not find read: %s\n", qname);
+	}
+
+	return QIDX_OK;
+}
+
+void die_usage(char const * prog) {
+	err("%s {create,search}", prog);
+	err("   create [bamfile] [indexfile]");
+	die("   search [index] [read]");
+}
+
+int main(int argc, char ** argv) {
+
+	if(!argc) return 1;
+
+	if(argc < 3) die_usage(argv[0]);
+
+	char const * type = argv[1];
+	qidx_err_t err;
+
+	if(strcmp(type, "create") == 0) {
+		if(argc != 4) die_usage(argv[0]);
+		char const * bamfile = argv[2];
+		char const * indexfile = argv[3];
+		err = qidx_create_index(bamfile, indexfile);
+		if(err) {
+			qidx_die_err(err);
+		}
+	} else if (strcmp(type, "search") == 0) {
+		if(argc != 5) die_usage(argv[0]);
+		char const * indexfile = argv[2];
+		char const * bamfile = argv[3];
+		char const * qname = argv[4];
+		if((err = qidx_search_index(indexfile, bamfile, qname))) {
+			qidx_die_err(err);
+		}
+	} else {
+		die_usage(argv[0]);
+	}
+
+}
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..e412184
--- /dev/null
+++ b/makefile
@@ -0,0 +1,3 @@
+main:
+	gcc main.c ./util/hash.c ./util/prime_search.c -lhts -lm ./util/sds/sds.c ./util/err.c -g -o qidx -fsanitize=address
+.PHONY: main
diff --git a/util/err.c b/util/err.c
new file mode 100644
index 0000000..1f3fb6d
--- /dev/null
+++ b/util/err.c
@@ -0,0 +1,80 @@
+#include "util.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#define FORMAT_MAX_SIZE 1024
+
+extern int errno;
+
+void NORETURN die(char const * format, ...) {
+	va_list vargs;
+	va_start (vargs, format);
+
+	vfprintf(stderr, format, vargs);
+	fprintf(stderr, "\n");
+
+	va_end(vargs);
+	exit(1);
+}
+
+void err(char const * format, ...) {
+	va_list vargs;
+	va_start (vargs, format);
+
+	vfprintf(stderr, format, vargs);
+	fprintf(stderr, "\n");
+
+	va_end(vargs);
+}
+
+void format_with_errno(char * buf, size_t n, char const * format) {
+	char const * errstr = strerror(errno);
+	char fmt_err[256];
+
+	size_t i = 0,
+		   j = 0;
+	while(errstr[j] && i < sizeof(fmt_err) - 1) {
+		if((fmt_err[i++] = errstr[j++]) != '%')
+			continue;
+
+		if(i < sizeof(fmt_err) - 1) {
+			fmt_err[i++] = '%';
+		} else {
+			i--;
+		}
+	}
+
+	fmt_err[i] = '\0';
+
+	snprintf(buf, n, "%s: %s", format, fmt_err);
+}
+
+
+void NORETURN die_errno(char const * format, ...) {
+	va_list vargs;
+	va_start (vargs, format);
+
+	char buf[FORMAT_MAX_SIZE];
+	format_with_errno(buf, sizeof(buf), format);
+	vfprintf(stderr, buf, vargs);
+	fprintf(stderr, "\n");
+	exit(1);
+
+	va_end(vargs);
+}
+
+void err_errno(char const * format, ...) {
+	va_list vargs;
+	va_start (vargs, format);
+
+	char buf[FORMAT_MAX_SIZE];
+
+	format_with_errno(buf, sizeof(buf), format);
+	vfprintf(stderr, buf, vargs);
+	fprintf(stderr, "\n");
+	
+	va_end(vargs);
+}
diff --git a/util/hash.c b/util/hash.c
new file mode 100644
index 0000000..5f9b482
--- /dev/null
+++ b/util/hash.c
@@ -0,0 +1,46 @@
+#include "hash.h"
+
+uint64_t murmur_hash_64a(void const * key, int len, unsigned int seed)
+{
+	const uint64_t m = 0xc6a4a7935bd1e995;
+	const int r = 47;
+
+	uint64_t h = seed ^ (len * m);
+
+	const uint64_t * data = (const uint64_t *)key;
+	const uint64_t * end = data + (len/8);
+
+	while(data != end)
+	{
+		uint64_t k = *data++;
+
+		k *= m; 
+		k ^= k >> r; 
+		k *= m; 
+		
+		h ^= k;
+		h *= m; 
+	}
+
+	const unsigned char * data2 = (const unsigned char*)data;
+
+	switch(len & 7)
+	{
+	case 7: h ^= (uint64_t)(data2[6]) << 48;
+	case 6: h ^= (uint64_t)(data2[5]) << 40;
+	case 5: h ^= (uint64_t)(data2[4]) << 32;
+	case 4: h ^= (uint64_t)(data2[3]) << 24;
+	case 3: h ^= (uint64_t)(data2[2]) << 16;
+	case 2: h ^= (uint64_t)(data2[1]) << 8;
+	case 1: h ^= (uint64_t)(data2[0]);
+	        h *= m;
+	};
+ 
+	h ^= h >> r;
+	h *= m;
+	h ^= h >> r;
+
+	return h;
+} 
+
+
diff --git a/util/hash.h b/util/hash.h
new file mode 100644
index 0000000..0c6a546
--- /dev/null
+++ b/util/hash.h
@@ -0,0 +1,27 @@
+#ifndef UTIL_HASH_H
+#define UTIL_HASH_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+uint64_t murmur_hash_64a(void const * key, int len, unsigned int seed);
+
+static inline uint64_t murmur_hash_str_64s(char const * str, unsigned int seed) {
+	return murmur_hash_64a(str, strlen(str), seed);
+}
+
+static inline uint64_t murmur64(uint64_t h) {
+	h ^= h >> 33;
+	h *= 0xff51afd7ed558ccdLLU;
+	h ^= h >> 33;
+	h *= 0xc4ceb9fe1a85ec53LLU;
+	h ^= h >> 33;
+	return h;
+}
+
+static inline size_t murmur64_range(uint64_t h, size_t n) {
+	return murmur64(h) % n;
+}
+
+#endif /* ifndef UTIL_HASH_H */
diff --git a/util/prime_search.c b/util/prime_search.c
new file mode 100644
index 0000000..e717faa
--- /dev/null
+++ b/util/prime_search.c
@@ -0,0 +1,344 @@
+#include "prime_search.h"
+
+size_t const PRIME_TABLE[] = {
+	0ul,
+	2ul,
+	3ul,
+	5ul,
+	7ul,
+	11ul,
+	13ul,
+	17ul,
+	19ul,
+	23ul,
+	29ul,
+	31ul,
+	37ul,
+	41ul,
+	43ul,
+	47ul,
+	53ul,
+	59ul,
+	61ul,
+	67ul,
+	71ul,
+	73ul,
+	79ul,
+	83ul,
+	89ul,
+	97ul,
+	101ul,
+	103ul,
+	107ul,
+	109ul,
+	113ul,
+	127ul,
+	131ul,
+	137ul,
+	139ul,
+	149ul,
+	151ul,
+	157ul,
+	163ul,
+	167ul,
+	173ul,
+	179ul,
+	181ul,
+	191ul,
+	193ul,
+	197ul,
+	199ul,
+	211ul,
+	227ul,
+	241ul,
+	257ul,
+	277ul,
+	293ul,
+	313ul,
+	337ul,
+	359ul,
+	383ul,
+	409ul,
+	439ul,
+	467ul,
+	503ul,
+	541ul,
+	577ul,
+	619ul,
+	661ul,
+	709ul,
+	761ul,
+	823ul,
+	887ul,
+	953ul,
+	1031ul,
+	1109ul,
+	1193ul,
+	1289ul,
+	1381ul,
+	1493ul,
+	1613ul,
+	1741ul,
+	1879ul,
+	2029ul,
+	2179ul,
+	2357ul,
+	2549ul,
+	2753ul,
+	2971ul,
+	3209ul,
+	3469ul,
+	3739ul,
+	4027ul,
+	4349ul,
+	4703ul,
+	5087ul,
+	5503ul,
+	5953ul,
+	6427ul,
+	6949ul,
+	7517ul,
+	8123ul,
+	8783ul,
+	9497ul,
+	10273ul,
+	11113ul,
+	12011ul,
+	12983ul,
+	14033ul,
+	15173ul,
+	16411ul,
+	17749ul,
+	19183ul,
+	20753ul,
+	22447ul,
+	24281ul,
+	26267ul,
+	28411ul,
+	30727ul,
+	33223ul,
+	35933ul,
+	38873ul,
+	42043ul,
+	45481ul,
+	49201ul,
+	53201ul,
+	57557ul,
+	62233ul,
+	67307ul,
+	72817ul,
+	78779ul,
+	85229ul,
+	92203ul,
+	99733ul,
+	107897ul,
+	116731ul,
+	126271ul,
+	136607ul,
+	147793ul,
+	159871ul,
+	172933ul,
+	187091ul,
+	202409ul,
+	218971ul,
+	236897ul,
+	256279ul,
+	277261ul,
+	299951ul,
+	324503ul,
+	351061ul,
+	379787ul,
+	410857ul,
+	444487ul,
+	480881ul,
+	520241ul,
+	562841ul,
+	608903ul,
+	658753ul,
+	712697ul,
+	771049ul,
+	834181ul,
+	902483ul,
+	976369ul,
+	1056323ul,
+	1142821ul,
+	1236397ul,
+	1337629ul,
+	1447153ul,
+	1565659ul,
+	1693859ul,
+	1832561ul,
+	1982627ul,
+	2144977ul,
+	2320627ul,
+	2510653ul,
+	2716249ul,
+	2938679ul,
+	3179303ul,
+	3439651ul,
+	3721303ul,
+	4026031ul,
+	4355707ul,
+	4712381ul,
+	5098259ul,
+	5515729ul,
+	5967347ul,
+	6456007ul,
+	6984629ul,
+	7556579ul,
+	8175383ul,
+	8844859ul,
+	9569143ul,
+	10352717ul,
+	11200489ul,
+	12117689ul,
+	13109983ul,
+	14183539ul,
+	15345007ul,
+	16601593ul,
+	17961079ul,
+	19431899ul,
+	21023161ul,
+	22744717ul,
+	24607243ul,
+	26622317ul,
+	28802401ul,
+	31160981ul,
+	33712729ul,
+	36473443ul,
+	39460231ul,
+	42691603ul,
+	46187573ul,
+	49969847ul,
+	54061849ul,
+	58488943ul,
+	63278561ul,
+	68460391ul,
+	74066549ul,
+	80131819ul,
+	86693767ul,
+	93793069ul,
+	101473717ul,
+	109783337ul,
+	118773397ul,
+	128499677ul,
+	139022417ul,
+	150406843ul,
+	162723577ul,
+	176048909ul,
+	190465427ul,
+	206062531ul,
+	222936881ul,
+	241193053ul,
+	260944219ul,
+	282312799ul,
+	305431229ul,
+	330442829ul,
+	357502601ul,
+	386778277ul,
+	418451333ul,
+	452718089ul,
+	489790921ul,
+	529899637ul,
+	573292817ul,
+	620239453ul,
+	671030513ul,
+	725980837ul,
+	785430967ul,
+	849749479ul,
+	919334987ul,
+	994618837ul,
+	1076067617ul,
+	1164186217ul,
+	1259520799ul,
+	1362662261ul,
+	1474249943ul,
+	1594975441ul,
+	1725587117ul,
+	1866894511ul,
+	2019773507ul,
+	2185171673ul,
+	2364114217ul,
+	2557710269ul,
+	2767159799ul,
+	2993761039ul,
+	3238918481ul,
+	3504151727ul,
+	3791104843ul,
+	4101556399ul,
+	4294967291ul,
+#if __WORDSIZE == 64
+	4294967291ul
+#else
+	6442450933ul,
+	8589934583ul,
+	12884901857ul,
+	17179869143ul,
+	25769803693ul,
+	34359738337ul,
+	51539607367ul,
+	68719476731ul,
+	103079215087ul,
+	137438953447ul,
+	206158430123ul,
+	274877906899ul,
+	412316860387ul,
+	549755813881ul,
+	824633720731ul,
+	1099511627689ul,
+	1649267441579ul,
+	2199023255531ul,
+	3298534883309ul,
+	4398046511093ul,
+	6597069766607ul,
+	8796093022151ul,
+	13194139533241ul,
+	17592186044399ul,
+	26388279066581ul,
+	35184372088777ul,
+	52776558133177ul,
+	70368744177643ul,
+	105553116266399ul,
+	140737488355213ul,
+	211106232532861ul,
+	281474976710597ul,
+	562949953421231ul,
+	1125899906842597ul,
+	2251799813685119ul,
+	4503599627370449ul,
+	9007199254740881ul,
+	18014398509481951ul,
+	36028797018963913ul,
+	72057594037927931ul,
+	144115188075855859ul,
+	288230376151711717ul,
+	576460752303423433ul,
+	1152921504606846883ul,
+	2305843009213693951ul,
+	4611686018427387847ul,
+	9223372036854775783ul,
+	18446744073709551557ul
+#endif
+};
+
+#define ENTS (sizeof(PRIME_TABLE) / sizeof(*PRIME_TABLE))
+
+size_t next_prime_above_lowerbound(size_t n) {
+	size_t const * a = PRIME_TABLE,
+	             * b = PRIME_TABLE + (ENTS - 1),
+					 * m;
+	do {
+		m = a + (b - a) / 2;
+		
+		if(*m >= n) {
+			b = m;
+		} else if (*m < n) {
+			a = m + 1;
+		} else {
+			break;
+		}
+	} while(*a < *b);
+
+	return *b;
+}
+
diff --git a/util/prime_search.h b/util/prime_search.h
new file mode 100644
index 0000000..842c6d9
--- /dev/null
+++ b/util/prime_search.h
@@ -0,0 +1,8 @@
+#ifndef _NEXT_LARGEST_PRIME_H
+#define _NEXT_LARGEST_PRIME_H
+
+#include <stddef.h>
+
+size_t next_prime_above_lowerbound(size_t n);
+
+#endif /* ifndef _NEXT_LARGEST_PRIME_H */
diff --git a/util/sds/sds.c b/util/sds/sds.c
new file mode 100644
index 0000000..1189716
--- /dev/null
+++ b/util/sds/sds.c
@@ -0,0 +1,1300 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+#include <limits.h>
+#include "sds.h"
+#include "sdsalloc.h"
+
+const char *SDS_NOINIT = "SDS_NOINIT";
+
+static inline int sdsHdrSize(char type) {
+    switch(type&SDS_TYPE_MASK) {
+        case SDS_TYPE_5:
+            return sizeof(struct sdshdr5);
+        case SDS_TYPE_8:
+            return sizeof(struct sdshdr8);
+        case SDS_TYPE_16:
+            return sizeof(struct sdshdr16);
+        case SDS_TYPE_32:
+            return sizeof(struct sdshdr32);
+        case SDS_TYPE_64:
+            return sizeof(struct sdshdr64);
+    }
+    return 0;
+}
+
+static inline char sdsReqType(size_t string_size) {
+    if (string_size < 1<<5)
+        return SDS_TYPE_5;
+    if (string_size < 1<<8)
+        return SDS_TYPE_8;
+    if (string_size < 1<<16)
+        return SDS_TYPE_16;
+#if (LONG_MAX == LLONG_MAX)
+    if (string_size < 1ll<<32)
+        return SDS_TYPE_32;
+    return SDS_TYPE_64;
+#else
+    return SDS_TYPE_32;
+#endif
+}
+
+/* Create a new sds string with the content specified by the 'init' pointer
+ * and 'initlen'.
+ * If NULL is used for 'init' the string is initialized with zero bytes.
+ * If SDS_NOINIT is used, the buffer is left uninitialized;
+ *
+ * The string is always null-termined (all the sds strings are, always) so
+ * even if you create an sds string with:
+ *
+ * mystring = sdsnewlen("abc",3);
+ *
+ * You can print the string with printf() as there is an implicit \0 at the
+ * end of the string. However the string is binary safe and can contain
+ * \0 characters in the middle, as the length is stored in the sds header. */
+sds sdsnewlen(const void *init, size_t initlen) {
+    void *sh;
+    sds s;
+    char type = sdsReqType(initlen);
+    /* Empty strings are usually created in order to append. Use type 8
+     * since type 5 is not good at this. */
+    if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
+    int hdrlen = sdsHdrSize(type);
+    unsigned char *fp; /* flags pointer. */
+
+    sh = s_malloc(hdrlen+initlen+1);
+    if (sh == NULL) return NULL;
+    if (init==SDS_NOINIT)
+        init = NULL;
+    else if (!init)
+        memset(sh, 0, hdrlen+initlen+1);
+    s = (char*)sh+hdrlen;
+    fp = ((unsigned char*)s)-1;
+    switch(type) {
+        case SDS_TYPE_5: {
+            *fp = type | (initlen << SDS_TYPE_BITS);
+            break;
+        }
+        case SDS_TYPE_8: {
+            SDS_HDR_VAR(8,s);
+            sh->len = initlen;
+            sh->alloc = initlen;
+            *fp = type;
+            break;
+        }
+        case SDS_TYPE_16: {
+            SDS_HDR_VAR(16,s);
+            sh->len = initlen;
+            sh->alloc = initlen;
+            *fp = type;
+            break;
+        }
+        case SDS_TYPE_32: {
+            SDS_HDR_VAR(32,s);
+            sh->len = initlen;
+            sh->alloc = initlen;
+            *fp = type;
+            break;
+        }
+        case SDS_TYPE_64: {
+            SDS_HDR_VAR(64,s);
+            sh->len = initlen;
+            sh->alloc = initlen;
+            *fp = type;
+            break;
+        }
+    }
+    if (initlen && init)
+        memcpy(s, init, initlen);
+    s[initlen] = '\0';
+    return s;
+}
+
+/* Create an empty (zero length) sds string. Even in this case the string
+ * always has an implicit null term. */
+sds sdsempty(void) {
+    return sdsnewlen("",0);
+}
+
+/* Create a new sds string starting from a null terminated C string. */
+sds sdsnew(const char *init) {
+    size_t initlen = (init == NULL) ? 0 : strlen(init);
+    return sdsnewlen(init, initlen);
+}
+
+/* Duplicate an sds string. */
+sds sdsdup(const sds s) {
+    return sdsnewlen(s, sdslen(s));
+}
+
+/* Free an sds string. No operation is performed if 's' is NULL. */
+void sdsfree(sds s) {
+    if (s == NULL) return;
+    s_free((char*)s-sdsHdrSize(s[-1]));
+}
+
+/* Set the sds string length to the length as obtained with strlen(), so
+ * considering as content only up to the first null term character.
+ *
+ * This function is useful when the sds string is hacked manually in some
+ * way, like in the following example:
+ *
+ * s = sdsnew("foobar");
+ * s[2] = '\0';
+ * sdsupdatelen(s);
+ * printf("%d\n", sdslen(s));
+ *
+ * The output will be "2", but if we comment out the call to sdsupdatelen()
+ * the output will be "6" as the string was modified but the logical length
+ * remains 6 bytes. */
+void sdsupdatelen(sds s) {
+    size_t reallen = strlen(s);
+    sdssetlen(s, reallen);
+}
+
+/* Modify an sds string in-place to make it empty (zero length).
+ * However all the existing buffer is not discarded but set as free space
+ * so that next append operations will not require allocations up to the
+ * number of bytes previously available. */
+void sdsclear(sds s) {
+    sdssetlen(s, 0);
+    s[0] = '\0';
+}
+
+/* Enlarge the free space at the end of the sds string so that the caller
+ * is sure that after calling this function can overwrite up to addlen
+ * bytes after the end of the string, plus one more byte for nul term.
+ *
+ * Note: this does not change the *length* of the sds string as returned
+ * by sdslen(), but only the free buffer space we have. */
+sds sdsMakeRoomFor(sds s, size_t addlen) {
+    void *sh, *newsh;
+    size_t avail = sdsavail(s);
+    size_t len, newlen;
+    char type, oldtype = s[-1] & SDS_TYPE_MASK;
+    int hdrlen;
+
+    /* Return ASAP if there is enough space left. */
+    if (avail >= addlen) return s;
+
+    len = sdslen(s);
+    sh = (char*)s-sdsHdrSize(oldtype);
+    newlen = (len+addlen);
+    if (newlen < SDS_MAX_PREALLOC)
+        newlen *= 2;
+    else
+        newlen += SDS_MAX_PREALLOC;
+
+    type = sdsReqType(newlen);
+
+    /* Don't use type 5: the user is appending to the string and type 5 is
+     * not able to remember empty space, so sdsMakeRoomFor() must be called
+     * at every appending operation. */
+    if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+
+    hdrlen = sdsHdrSize(type);
+    if (oldtype==type) {
+        newsh = s_realloc(sh, hdrlen+newlen+1);
+        if (newsh == NULL) return NULL;
+        s = (char*)newsh+hdrlen;
+    } else {
+        /* Since the header size changes, need to move the string forward,
+         * and can't use realloc */
+        newsh = s_malloc(hdrlen+newlen+1);
+        if (newsh == NULL) return NULL;
+        memcpy((char*)newsh+hdrlen, s, len+1);
+        s_free(sh);
+        s = (char*)newsh+hdrlen;
+        s[-1] = type;
+        sdssetlen(s, len);
+    }
+    sdssetalloc(s, newlen);
+    return s;
+}
+
+/* Reallocate the sds string so that it has no free space at the end. The
+ * contained string remains not altered, but next concatenation operations
+ * will require a reallocation.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdsRemoveFreeSpace(sds s) {
+    void *sh, *newsh;
+    char type, oldtype = s[-1] & SDS_TYPE_MASK;
+    int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
+    size_t len = sdslen(s);
+    size_t avail = sdsavail(s);
+    sh = (char*)s-oldhdrlen;
+
+    /* Return ASAP if there is no space left. */
+    if (avail == 0) return s;
+
+    /* Check what would be the minimum SDS header that is just good enough to
+     * fit this string. */
+    type = sdsReqType(len);
+    hdrlen = sdsHdrSize(type);
+
+    /* If the type is the same, or at least a large enough type is still
+     * required, we just realloc(), letting the allocator to do the copy
+     * only if really needed. Otherwise if the change is huge, we manually
+     * reallocate the string to use the different header type. */
+    if (oldtype==type || type > SDS_TYPE_8) {
+        newsh = s_realloc(sh, oldhdrlen+len+1);
+        if (newsh == NULL) return NULL;
+        s = (char*)newsh+oldhdrlen;
+    } else {
+        newsh = s_malloc(hdrlen+len+1);
+        if (newsh == NULL) return NULL;
+        memcpy((char*)newsh+hdrlen, s, len+1);
+        s_free(sh);
+        s = (char*)newsh+hdrlen;
+        s[-1] = type;
+        sdssetlen(s, len);
+    }
+    sdssetalloc(s, len);
+    return s;
+}
+
+/* Return the total size of the allocation of the specified sds string,
+ * including:
+ * 1) The sds header before the pointer.
+ * 2) The string.
+ * 3) The free buffer at the end if any.
+ * 4) The implicit null term.
+ */
+size_t sdsAllocSize(sds s) {
+    size_t alloc = sdsalloc(s);
+    return sdsHdrSize(s[-1])+alloc+1;
+}
+
+/* Return the pointer of the actual SDS allocation (normally SDS strings
+ * are referenced by the start of the string buffer). */
+void *sdsAllocPtr(sds s) {
+    return (void*) (s-sdsHdrSize(s[-1]));
+}
+
+/* Increment the sds length and decrements the left free space at the
+ * end of the string according to 'incr'. Also set the null term
+ * in the new end of the string.
+ *
+ * This function is used in order to fix the string length after the
+ * user calls sdsMakeRoomFor(), writes something after the end of
+ * the current string, and finally needs to set the new length.
+ *
+ * Note: it is possible to use a negative increment in order to
+ * right-trim the string.
+ *
+ * Usage example:
+ *
+ * Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the
+ * following schema, to cat bytes coming from the kernel to the end of an
+ * sds string without copying into an intermediate buffer:
+ *
+ * oldlen = sdslen(s);
+ * s = sdsMakeRoomFor(s, BUFFER_SIZE);
+ * nread = read(fd, s+oldlen, BUFFER_SIZE);
+ * ... check for nread <= 0 and handle it ...
+ * sdsIncrLen(s, nread);
+ */
+void sdsIncrLen(sds s, ssize_t incr) {
+    unsigned char flags = s[-1];
+    size_t len;
+    switch(flags&SDS_TYPE_MASK) {
+        case SDS_TYPE_5: {
+            unsigned char *fp = ((unsigned char*)s)-1;
+            unsigned char oldlen = SDS_TYPE_5_LEN(flags);
+            assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
+            *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
+            len = oldlen+incr;
+            break;
+        }
+        case SDS_TYPE_8: {
+            SDS_HDR_VAR(8,s);
+            assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+            len = (sh->len += incr);
+            break;
+        }
+        case SDS_TYPE_16: {
+            SDS_HDR_VAR(16,s);
+            assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+            len = (sh->len += incr);
+            break;
+        }
+        case SDS_TYPE_32: {
+            SDS_HDR_VAR(32,s);
+            assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+            len = (sh->len += incr);
+            break;
+        }
+        case SDS_TYPE_64: {
+            SDS_HDR_VAR(64,s);
+            assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
+            len = (sh->len += incr);
+            break;
+        }
+        default: len = 0; /* Just to avoid compilation warnings. */
+    }
+    s[len] = '\0';
+}
+
+/* Grow the sds to have the specified length. Bytes that were not part of
+ * the original length of the sds will be set to zero.
+ *
+ * if the specified length is smaller than the current length, no operation
+ * is performed. */
+sds sdsgrowzero(sds s, size_t len) {
+    size_t curlen = sdslen(s);
+
+    if (len <= curlen) return s;
+    s = sdsMakeRoomFor(s,len-curlen);
+    if (s == NULL) return NULL;
+
+    /* Make sure added region doesn't contain garbage */
+    memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
+    sdssetlen(s, len);
+    return s;
+}
+
+/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
+ * end of the specified sds string 's'.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatlen(sds s, const void *t, size_t len) {
+    size_t curlen = sdslen(s);
+
+    s = sdsMakeRoomFor(s,len);
+    if (s == NULL) return NULL;
+    memcpy(s+curlen, t, len);
+    sdssetlen(s, curlen+len);
+    s[curlen+len] = '\0';
+    return s;
+}
+
+/* Append the specified null termianted C string to the sds string 's'.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscat(sds s, const char *t) {
+    return sdscatlen(s, t, strlen(t));
+}
+
+/* Append the specified sds 't' to the existing sds 's'.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatsds(sds s, const sds t) {
+    return sdscatlen(s, t, sdslen(t));
+}
+
+/* Destructively modify the sds string 's' to hold the specified binary
+ * safe string pointed by 't' of length 'len' bytes. */
+sds sdscpylen(sds s, const char *t, size_t len) {
+    if (sdsalloc(s) < len) {
+        s = sdsMakeRoomFor(s,len-sdslen(s));
+        if (s == NULL) return NULL;
+    }
+    memcpy(s, t, len);
+    s[len] = '\0';
+    sdssetlen(s, len);
+    return s;
+}
+
+/* Like sdscpylen() but 't' must be a null-termined string so that the length
+ * of the string is obtained with strlen(). */
+sds sdscpy(sds s, const char *t) {
+    return sdscpylen(s, t, strlen(t));
+}
+
+/* Helper for sdscatlonglong() doing the actual number -> string
+ * conversion. 's' must point to a string with room for at least
+ * SDS_LLSTR_SIZE bytes.
+ *
+ * The function returns the length of the null-terminated string
+ * representation stored at 's'. */
+#define SDS_LLSTR_SIZE 21
+int sdsll2str(char *s, long long value) {
+    char *p, aux;
+    unsigned long long v;
+    size_t l;
+
+    /* Generate the string representation, this method produces
+     * an reversed string. */
+    v = (value < 0) ? -value : value;
+    p = s;
+    do {
+        *p++ = '0'+(v%10);
+        v /= 10;
+    } while(v);
+    if (value < 0) *p++ = '-';
+
+    /* Compute length and add null term. */
+    l = p-s;
+    *p = '\0';
+
+    /* Reverse the string. */
+    p--;
+    while(s < p) {
+        aux = *s;
+        *s = *p;
+        *p = aux;
+        s++;
+        p--;
+    }
+    return l;
+}
+
+/* Identical sdsll2str(), but for unsigned long long type. */
+int sdsull2str(char *s, unsigned long long v) {
+    char *p, aux;
+    size_t l;
+
+    /* Generate the string representation, this method produces
+     * an reversed string. */
+    p = s;
+    do {
+        *p++ = '0'+(v%10);
+        v /= 10;
+    } while(v);
+
+    /* Compute length and add null term. */
+    l = p-s;
+    *p = '\0';
+
+    /* Reverse the string. */
+    p--;
+    while(s < p) {
+        aux = *s;
+        *s = *p;
+        *p = aux;
+        s++;
+        p--;
+    }
+    return l;
+}
+
+/* Create an sds string from a long long value. It is much faster than:
+ *
+ * sdscatprintf(sdsempty(),"%lld\n", value);
+ */
+sds sdsfromlonglong(long long value) {
+    char buf[SDS_LLSTR_SIZE];
+    int len = sdsll2str(buf,value);
+
+    return sdsnewlen(buf,len);
+}
+
+/* Like sdscatprintf() but gets va_list instead of being variadic. */
+sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
+    va_list cpy;
+    char staticbuf[1024], *buf = staticbuf, *t;
+    size_t buflen = strlen(fmt)*2;
+
+    /* We try to start using a static buffer for speed.
+     * If not possible we revert to heap allocation. */
+    if (buflen > sizeof(staticbuf)) {
+        buf = s_malloc(buflen);
+        if (buf == NULL) return NULL;
+    } else {
+        buflen = sizeof(staticbuf);
+    }
+
+    /* Try with buffers two times bigger every time we fail to
+     * fit the string in the current buffer size. */
+    while(1) {
+        buf[buflen-2] = '\0';
+        va_copy(cpy,ap);
+        vsnprintf(buf, buflen, fmt, cpy);
+        va_end(cpy);
+        if (buf[buflen-2] != '\0') {
+            if (buf != staticbuf) s_free(buf);
+            buflen *= 2;
+            buf = s_malloc(buflen);
+            if (buf == NULL) return NULL;
+            continue;
+        }
+        break;
+    }
+
+    /* Finally concat the obtained string to the SDS string and return it. */
+    t = sdscat(s, buf);
+    if (buf != staticbuf) s_free(buf);
+    return t;
+}
+
+/* Append to the sds string 's' a string obtained using printf-alike format
+ * specifier.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call.
+ *
+ * Example:
+ *
+ * s = sdsnew("Sum is: ");
+ * s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
+ *
+ * Often you need to create a string from scratch with the printf-alike
+ * format. When this is the need, just use sdsempty() as the target string:
+ *
+ * s = sdscatprintf(sdsempty(), "... your format ...", args);
+ */
+sds sdscatprintf(sds s, const char *fmt, ...) {
+    va_list ap;
+    char *t;
+    va_start(ap, fmt);
+    t = sdscatvprintf(s,fmt,ap);
+    va_end(ap);
+    return t;
+}
+
+/* This function is similar to sdscatprintf, but much faster as it does
+ * not rely on sprintf() family functions implemented by the libc that
+ * are often very slow. Moreover directly handling the sds string as
+ * new data is concatenated provides a performance improvement.
+ *
+ * However this function only handles an incompatible subset of printf-alike
+ * format specifiers:
+ *
+ * %s - C String
+ * %S - SDS string
+ * %i - signed int
+ * %I - 64 bit signed integer (long long, int64_t)
+ * %u - unsigned int
+ * %U - 64 bit unsigned integer (unsigned long long, uint64_t)
+ * %% - Verbatim "%" character.
+ */
+sds sdscatfmt(sds s, char const *fmt, ...) {
+    size_t initlen = sdslen(s);
+    const char *f = fmt;
+    long i;
+    va_list ap;
+
+    /* To avoid continuous reallocations, let's start with a buffer that
+     * can hold at least two times the format string itself. It's not the
+     * best heuristic but seems to work in practice. */
+    s = sdsMakeRoomFor(s, initlen + strlen(fmt)*2);
+    va_start(ap,fmt);
+    f = fmt;    /* Next format specifier byte to process. */
+    i = initlen; /* Position of the next byte to write to dest str. */
+    while(*f) {
+        char next, *str;
+        size_t l;
+        long long num;
+        unsigned long long unum;
+
+        /* Make sure there is always space for at least 1 char. */
+        if (sdsavail(s)==0) {
+            s = sdsMakeRoomFor(s,1);
+        }
+
+        switch(*f) {
+        case '%':
+            next = *(f+1);
+            f++;
+            switch(next) {
+            case 's':
+            case 'S':
+                str = va_arg(ap,char*);
+                l = (next == 's') ? strlen(str) : sdslen(str);
+                if (sdsavail(s) < l) {
+                    s = sdsMakeRoomFor(s,l);
+                }
+                memcpy(s+i,str,l);
+                sdsinclen(s,l);
+                i += l;
+                break;
+            case 'i':
+            case 'I':
+                if (next == 'i')
+                    num = va_arg(ap,int);
+                else
+                    num = va_arg(ap,long long);
+                {
+                    char buf[SDS_LLSTR_SIZE];
+                    l = sdsll2str(buf,num);
+                    if (sdsavail(s) < l) {
+                        s = sdsMakeRoomFor(s,l);
+                    }
+                    memcpy(s+i,buf,l);
+                    sdsinclen(s,l);
+                    i += l;
+                }
+                break;
+            case 'u':
+            case 'U':
+                if (next == 'u')
+                    unum = va_arg(ap,unsigned int);
+                else
+                    unum = va_arg(ap,unsigned long long);
+                {
+                    char buf[SDS_LLSTR_SIZE];
+                    l = sdsull2str(buf,unum);
+                    if (sdsavail(s) < l) {
+                        s = sdsMakeRoomFor(s,l);
+                    }
+                    memcpy(s+i,buf,l);
+                    sdsinclen(s,l);
+                    i += l;
+                }
+                break;
+            default: /* Handle %% and generally %<unknown>. */
+                s[i++] = next;
+                sdsinclen(s,1);
+                break;
+            }
+            break;
+        default:
+            s[i++] = *f;
+            sdsinclen(s,1);
+            break;
+        }
+        f++;
+    }
+    va_end(ap);
+
+    /* Add null-term */
+    s[i] = '\0';
+    return s;
+}
+
+/* Remove the part of the string from left and from right composed just of
+ * contiguous characters found in 'cset', that is a null terminted C string.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call.
+ *
+ * Example:
+ *
+ * s = sdsnew("AA...AA.a.aa.aHelloWorld     :::");
+ * s = sdstrim(s,"Aa. :");
+ * printf("%s\n", s);
+ *
+ * Output will be just "HelloWorld".
+ */
+sds sdstrim(sds s, const char *cset) {
+    char *start, *end, *sp, *ep;
+    size_t len;
+
+    sp = start = s;
+    ep = end = s+sdslen(s)-1;
+    while(sp <= end && strchr(cset, *sp)) sp++;
+    while(ep > sp && strchr(cset, *ep)) ep--;
+    len = (sp > ep) ? 0 : ((ep-sp)+1);
+    if (s != sp) memmove(s, sp, len);
+    s[len] = '\0';
+    sdssetlen(s,len);
+    return s;
+}
+
+/* Turn the string into a smaller (or equal) string containing only the
+ * substring specified by the 'start' and 'end' indexes.
+ *
+ * start and end can be negative, where -1 means the last character of the
+ * string, -2 the penultimate character, and so forth.
+ *
+ * The interval is inclusive, so the start and end characters will be part
+ * of the resulting string.
+ *
+ * The string is modified in-place.
+ *
+ * Example:
+ *
+ * s = sdsnew("Hello World");
+ * sdsrange(s,1,-1); => "ello World"
+ */
+void sdsrange(sds s, ssize_t start, ssize_t end) {
+    size_t newlen, len = sdslen(s);
+
+    if (len == 0) return;
+    if (start < 0) {
+        start = len+start;
+        if (start < 0) start = 0;
+    }
+    if (end < 0) {
+        end = len+end;
+        if (end < 0) end = 0;
+    }
+    newlen = (start > end) ? 0 : (end-start)+1;
+    if (newlen != 0) {
+        if (start >= (ssize_t)len) {
+            newlen = 0;
+        } else if (end >= (ssize_t)len) {
+            end = len-1;
+            newlen = (start > end) ? 0 : (end-start)+1;
+        }
+    } else {
+        start = 0;
+    }
+    if (start && newlen) memmove(s, s+start, newlen);
+    s[newlen] = 0;
+    sdssetlen(s,newlen);
+}
+
+/* Apply tolower() to every character of the sds string 's'. */
+void sdstolower(sds s) {
+    size_t len = sdslen(s), j;
+
+    for (j = 0; j < len; j++) s[j] = tolower(s[j]);
+}
+
+/* Apply toupper() to every character of the sds string 's'. */
+void sdstoupper(sds s) {
+    size_t len = sdslen(s), j;
+
+    for (j = 0; j < len; j++) s[j] = toupper(s[j]);
+}
+
+/* Compare two sds strings s1 and s2 with memcmp().
+ *
+ * Return value:
+ *
+ *     positive if s1 > s2.
+ *     negative if s1 < s2.
+ *     0 if s1 and s2 are exactly the same binary string.
+ *
+ * If two strings share exactly the same prefix, but one of the two has
+ * additional characters, the longer string is considered to be greater than
+ * the smaller one. */
+int sdscmp(const sds s1, const sds s2) {
+    size_t l1, l2, minlen;
+    int cmp;
+
+    l1 = sdslen(s1);
+    l2 = sdslen(s2);
+    minlen = (l1 < l2) ? l1 : l2;
+    cmp = memcmp(s1,s2,minlen);
+    if (cmp == 0) return l1>l2? 1: (l1<l2? -1: 0);
+    return cmp;
+}
+
+/* Split 's' with separator in 'sep'. An array
+ * of sds strings is returned. *count will be set
+ * by reference to the number of tokens returned.
+ *
+ * On out of memory, zero length string, zero length
+ * separator, NULL is returned.
+ *
+ * Note that 'sep' is able to split a string using
+ * a multi-character separator. For example
+ * sdssplit("foo_-_bar","_-_"); will return two
+ * elements "foo" and "bar".
+ *
+ * This version of the function is binary-safe but
+ * requires length arguments. sdssplit() is just the
+ * same function but for zero-terminated strings.
+ */
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count) {
+    int elements = 0, slots = 5;
+    long start = 0, j;
+    sds *tokens;
+
+    if (seplen < 1 || len < 0) return NULL;
+
+    tokens = s_malloc(sizeof(sds)*slots);
+    if (tokens == NULL) return NULL;
+
+    if (len == 0) {
+        *count = 0;
+        return tokens;
+    }
+    for (j = 0; j < (len-(seplen-1)); j++) {
+        /* make sure there is room for the next element and the final one */
+        if (slots < elements+2) {
+            sds *newtokens;
+
+            slots *= 2;
+            newtokens = s_realloc(tokens,sizeof(sds)*slots);
+            if (newtokens == NULL) goto cleanup;
+            tokens = newtokens;
+        }
+        /* search the separator */
+        if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {
+            tokens[elements] = sdsnewlen(s+start,j-start);
+            if (tokens[elements] == NULL) goto cleanup;
+            elements++;
+            start = j+seplen;
+            j = j+seplen-1; /* skip the separator */
+        }
+    }
+    /* Add the final element. We are sure there is room in the tokens array. */
+    tokens[elements] = sdsnewlen(s+start,len-start);
+    if (tokens[elements] == NULL) goto cleanup;
+    elements++;
+    *count = elements;
+    return tokens;
+
+cleanup:
+    {
+        int i;
+        for (i = 0; i < elements; i++) sdsfree(tokens[i]);
+        s_free(tokens);
+        *count = 0;
+        return NULL;
+    }
+}
+
+/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */
+void sdsfreesplitres(sds *tokens, int count) {
+    if (!tokens) return;
+    while(count--)
+        sdsfree(tokens[count]);
+    s_free(tokens);
+}
+
+/* Append to the sds string "s" an escaped string representation where
+ * all the non-printable characters (tested with isprint()) are turned into
+ * escapes in the form "\n\r\a...." or "\x<hex-number>".
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatrepr(sds s, const char *p, size_t len) {
+    s = sdscatlen(s,"\"",1);
+    while(len--) {
+        switch(*p) {
+        case '\\':
+        case '"':
+            s = sdscatprintf(s,"\\%c",*p);
+            break;
+        case '\n': s = sdscatlen(s,"\\n",2); break;
+        case '\r': s = sdscatlen(s,"\\r",2); break;
+        case '\t': s = sdscatlen(s,"\\t",2); break;
+        case '\a': s = sdscatlen(s,"\\a",2); break;
+        case '\b': s = sdscatlen(s,"\\b",2); break;
+        default:
+            if (isprint(*p))
+                s = sdscatprintf(s,"%c",*p);
+            else
+                s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
+            break;
+        }
+        p++;
+    }
+    return sdscatlen(s,"\"",1);
+}
+
+/* Helper function for sdssplitargs() that returns non zero if 'c'
+ * is a valid hex digit. */
+int is_hex_digit(char c) {
+    return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+           (c >= 'A' && c <= 'F');
+}
+
+/* Helper function for sdssplitargs() that converts a hex digit into an
+ * integer from 0 to 15 */
+int hex_digit_to_int(char c) {
+    switch(c) {
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    case 'a': case 'A': return 10;
+    case 'b': case 'B': return 11;
+    case 'c': case 'C': return 12;
+    case 'd': case 'D': return 13;
+    case 'e': case 'E': return 14;
+    case 'f': case 'F': return 15;
+    default: return 0;
+    }
+}
+
+/* Split a line into arguments, where every argument can be in the
+ * following programming-language REPL-alike form:
+ *
+ * foo bar "newline are supported\n" and "\xff\x00otherstuff"
+ *
+ * The number of arguments is stored into *argc, and an array
+ * of sds is returned.
+ *
+ * The caller should free the resulting array of sds strings with
+ * sdsfreesplitres().
+ *
+ * Note that sdscatrepr() is able to convert back a string into
+ * a quoted string in the same format sdssplitargs() is able to parse.
+ *
+ * The function returns the allocated tokens on success, even when the
+ * input string is empty, or NULL if the input contains unbalanced
+ * quotes or closed quotes followed by non space characters
+ * as in: "foo"bar or "foo'
+ */
+sds *sdssplitargs(const char *line, int *argc) {
+    const char *p = line;
+    char *current = NULL;
+    char **vector = NULL;
+
+    *argc = 0;
+    while(1) {
+        /* skip blanks */
+        while(*p && isspace(*p)) p++;
+        if (*p) {
+            /* get a token */
+            int inq=0;  /* set to 1 if we are in "quotes" */
+            int insq=0; /* set to 1 if we are in 'single quotes' */
+            int done=0;
+
+            if (current == NULL) current = sdsempty();
+            while(!done) {
+                if (inq) {
+                    if (*p == '\\' && *(p+1) == 'x' &&
+                                             is_hex_digit(*(p+2)) &&
+                                             is_hex_digit(*(p+3)))
+                    {
+                        unsigned char byte;
+
+                        byte = (hex_digit_to_int(*(p+2))*16)+
+                                hex_digit_to_int(*(p+3));
+                        current = sdscatlen(current,(char*)&byte,1);
+                        p += 3;
+                    } else if (*p == '\\' && *(p+1)) {
+                        char c;
+
+                        p++;
+                        switch(*p) {
+                        case 'n': c = '\n'; break;
+                        case 'r': c = '\r'; break;
+                        case 't': c = '\t'; break;
+                        case 'b': c = '\b'; break;
+                        case 'a': c = '\a'; break;
+                        default: c = *p; break;
+                        }
+                        current = sdscatlen(current,&c,1);
+                    } else if (*p == '"') {
+                        /* closing quote must be followed by a space or
+                         * nothing at all. */
+                        if (*(p+1) && !isspace(*(p+1))) goto err;
+                        done=1;
+                    } else if (!*p) {
+                        /* unterminated quotes */
+                        goto err;
+                    } else {
+                        current = sdscatlen(current,p,1);
+                    }
+                } else if (insq) {
+                    if (*p == '\\' && *(p+1) == '\'') {
+                        p++;
+                        current = sdscatlen(current,"'",1);
+                    } else if (*p == '\'') {
+                        /* closing quote must be followed by a space or
+                         * nothing at all. */
+                        if (*(p+1) && !isspace(*(p+1))) goto err;
+                        done=1;
+                    } else if (!*p) {
+                        /* unterminated quotes */
+                        goto err;
+                    } else {
+                        current = sdscatlen(current,p,1);
+                    }
+                } else {
+                    switch(*p) {
+                    case ' ':
+                    case '\n':
+                    case '\r':
+                    case '\t':
+                    case '\0':
+                        done=1;
+                        break;
+                    case '"':
+                        inq=1;
+                        break;
+                    case '\'':
+                        insq=1;
+                        break;
+                    default:
+                        current = sdscatlen(current,p,1);
+                        break;
+                    }
+                }
+                if (*p) p++;
+            }
+            /* add the token to the vector */
+            vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
+            vector[*argc] = current;
+            (*argc)++;
+            current = NULL;
+        } else {
+            /* Even on empty input string return something not NULL. */
+            if (vector == NULL) vector = s_malloc(sizeof(void*));
+            return vector;
+        }
+    }
+
+err:
+    while((*argc)--)
+        sdsfree(vector[*argc]);
+    s_free(vector);
+    if (current) sdsfree(current);
+    *argc = 0;
+    return NULL;
+}
+
+/* Modify the string substituting all the occurrences of the set of
+ * characters specified in the 'from' string to the corresponding character
+ * in the 'to' array.
+ *
+ * For instance: sdsmapchars(mystring, "ho", "01", 2)
+ * will have the effect of turning the string "hello" into "0ell1".
+ *
+ * The function returns the sds string pointer, that is always the same
+ * as the input pointer since no resize is needed. */
+sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) {
+    size_t j, i, l = sdslen(s);
+
+    for (j = 0; j < l; j++) {
+        for (i = 0; i < setlen; i++) {
+            if (s[j] == from[i]) {
+                s[j] = to[i];
+                break;
+            }
+        }
+    }
+    return s;
+}
+
+/* Join an array of C strings using the specified separator (also a C string).
+ * Returns the result as an sds string. */
+sds sdsjoin(char **argv, int argc, char *sep) {
+    sds join = sdsempty();
+    int j;
+
+    for (j = 0; j < argc; j++) {
+        join = sdscat(join, argv[j]);
+        if (j != argc-1) join = sdscat(join,sep);
+    }
+    return join;
+}
+
+/* Like sdsjoin, but joins an array of SDS strings. */
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {
+    sds join = sdsempty();
+    int j;
+
+    for (j = 0; j < argc; j++) {
+        join = sdscatsds(join, argv[j]);
+        if (j != argc-1) join = sdscatlen(join,sep,seplen);
+    }
+    return join;
+}
+
+/* Wrappers to the allocators used by SDS. Note that SDS will actually
+ * just use the macros defined into sdsalloc.h in order to avoid to pay
+ * the overhead of function calls. Here we define these wrappers only for
+ * the programs SDS is linked to, if they want to touch the SDS internals
+ * even if they use a different allocator. */
+void *sds_malloc(size_t size) { return s_malloc(size); }
+void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
+void sds_free(void *ptr) { s_free(ptr); }
+
+#if defined(SDS_TEST_MAIN)
+#include <stdio.h>
+#include "testhelp.h"
+#include "limits.h"
+
+#define UNUSED(x) (void)(x)
+int sdsTest(void) {
+    {
+        sds x = sdsnew("foo"), y;
+
+        test_cond("Create a string and obtain the length",
+            sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0)
+
+        sdsfree(x);
+        x = sdsnewlen("foo",2);
+        test_cond("Create a string with specified length",
+            sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0)
+
+        x = sdscat(x,"bar");
+        test_cond("Strings concatenation",
+            sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0);
+
+        x = sdscpy(x,"a");
+        test_cond("sdscpy() against an originally longer string",
+            sdslen(x) == 1 && memcmp(x,"a\0",2) == 0)
+
+        x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk");
+        test_cond("sdscpy() against an originally shorter string",
+            sdslen(x) == 33 &&
+            memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0)
+
+        sdsfree(x);
+        x = sdscatprintf(sdsempty(),"%d",123);
+        test_cond("sdscatprintf() seems working in the base case",
+            sdslen(x) == 3 && memcmp(x,"123\0",4) == 0)
+
+        sdsfree(x);
+        x = sdsnew("--");
+        x = sdscatfmt(x, "Hello %s World %I,%I--", "Hi!", LLONG_MIN,LLONG_MAX);
+        test_cond("sdscatfmt() seems working in the base case",
+            sdslen(x) == 60 &&
+            memcmp(x,"--Hello Hi! World -9223372036854775808,"
+                     "9223372036854775807--",60) == 0)
+        printf("[%s]\n",x);
+
+        sdsfree(x);
+        x = sdsnew("--");
+        x = sdscatfmt(x, "%u,%U--", UINT_MAX, ULLONG_MAX);
+        test_cond("sdscatfmt() seems working with unsigned numbers",
+            sdslen(x) == 35 &&
+            memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
+
+        sdsfree(x);
+        x = sdsnew(" x ");
+        sdstrim(x," x");
+        test_cond("sdstrim() works when all chars match",
+            sdslen(x) == 0)
+
+        sdsfree(x);
+        x = sdsnew(" x ");
+        sdstrim(x," ");
+        test_cond("sdstrim() works when a single char remains",
+            sdslen(x) == 1 && x[0] == 'x')
+
+        sdsfree(x);
+        x = sdsnew("xxciaoyyy");
+        sdstrim(x,"xy");
+        test_cond("sdstrim() correctly trims characters",
+            sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0)
+
+        y = sdsdup(x);
+        sdsrange(y,1,1);
+        test_cond("sdsrange(...,1,1)",
+            sdslen(y) == 1 && memcmp(y,"i\0",2) == 0)
+
+        sdsfree(y);
+        y = sdsdup(x);
+        sdsrange(y,1,-1);
+        test_cond("sdsrange(...,1,-1)",
+            sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
+
+        sdsfree(y);
+        y = sdsdup(x);
+        sdsrange(y,-2,-1);
+        test_cond("sdsrange(...,-2,-1)",
+            sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0)
+
+        sdsfree(y);
+        y = sdsdup(x);
+        sdsrange(y,2,1);
+        test_cond("sdsrange(...,2,1)",
+            sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
+
+        sdsfree(y);
+        y = sdsdup(x);
+        sdsrange(y,1,100);
+        test_cond("sdsrange(...,1,100)",
+            sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
+
+        sdsfree(y);
+        y = sdsdup(x);
+        sdsrange(y,100,100);
+        test_cond("sdsrange(...,100,100)",
+            sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
+
+        sdsfree(y);
+        sdsfree(x);
+        x = sdsnew("foo");
+        y = sdsnew("foa");
+        test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0)
+
+        sdsfree(y);
+        sdsfree(x);
+        x = sdsnew("bar");
+        y = sdsnew("bar");
+        test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0)
+
+        sdsfree(y);
+        sdsfree(x);
+        x = sdsnew("aar");
+        y = sdsnew("bar");
+        test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0)
+
+        sdsfree(y);
+        sdsfree(x);
+        x = sdsnewlen("\a\n\0foo\r",7);
+        y = sdscatrepr(sdsempty(),x,sdslen(x));
+        test_cond("sdscatrepr(...data...)",
+            memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
+
+        {
+            unsigned int oldfree;
+            char *p;
+            int step = 10, j, i;
+
+            sdsfree(x);
+            sdsfree(y);
+            x = sdsnew("0");
+            test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0);
+
+            /* Run the test a few times in order to hit the first two
+             * SDS header types. */
+            for (i = 0; i < 10; i++) {
+                int oldlen = sdslen(x);
+                x = sdsMakeRoomFor(x,step);
+                int type = x[-1]&SDS_TYPE_MASK;
+
+                test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen);
+                if (type != SDS_TYPE_5) {
+                    test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step);
+                    oldfree = sdsavail(x);
+                }
+                p = x+oldlen;
+                for (j = 0; j < step; j++) {
+                    p[j] = 'A'+j;
+                }
+                sdsIncrLen(x,step);
+            }
+            test_cond("sdsMakeRoomFor() content",
+                memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0);
+            test_cond("sdsMakeRoomFor() final length",sdslen(x)==101);
+
+            sdsfree(x);
+        }
+    }
+    test_report()
+    return 0;
+}
+#endif
+
+#ifdef SDS_TEST_MAIN
+int main(void) {
+    return sdsTest();
+}
+#endif
diff --git a/util/sds/sds.h b/util/sds/sds.h
new file mode 100644
index 0000000..adcc12c
--- /dev/null
+++ b/util/sds/sds.h
@@ -0,0 +1,274 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SDS_H
+#define __SDS_H
+
+#define SDS_MAX_PREALLOC (1024*1024)
+extern const char *SDS_NOINIT;
+
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdint.h>
+
+typedef char *sds;
+
+/* Note: sdshdr5 is never used, we just access the flags byte directly.
+ * However is here to document the layout of type 5 SDS strings. */
+struct __attribute__ ((__packed__)) sdshdr5 {
+    unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
+    char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr8 {
+    uint8_t len; /* used */
+    uint8_t alloc; /* excluding the header and null terminator */
+    unsigned char flags; /* 3 lsb of type, 5 unused bits */
+    char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr16 {
+    uint16_t len; /* used */
+    uint16_t alloc; /* excluding the header and null terminator */
+    unsigned char flags; /* 3 lsb of type, 5 unused bits */
+    char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr32 {
+    uint32_t len; /* used */
+    uint32_t alloc; /* excluding the header and null terminator */
+    unsigned char flags; /* 3 lsb of type, 5 unused bits */
+    char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr64 {
+    uint64_t len; /* used */
+    uint64_t alloc; /* excluding the header and null terminator */
+    unsigned char flags; /* 3 lsb of type, 5 unused bits */
+    char buf[];
+};
+
+#define SDS_TYPE_5  0
+#define SDS_TYPE_8  1
+#define SDS_TYPE_16 2
+#define SDS_TYPE_32 3
+#define SDS_TYPE_64 4
+#define SDS_TYPE_MASK 7
+#define SDS_TYPE_BITS 3
+#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
+#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
+#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
+
+static inline size_t sdslen(const sds s) {
+    unsigned char flags = s[-1];
+    switch(flags&SDS_TYPE_MASK) {
+        case SDS_TYPE_5:
+            return SDS_TYPE_5_LEN(flags);
+        case SDS_TYPE_8:
+            return SDS_HDR(8,s)->len;
+        case SDS_TYPE_16:
+            return SDS_HDR(16,s)->len;
+        case SDS_TYPE_32:
+            return SDS_HDR(32,s)->len;
+        case SDS_TYPE_64:
+            return SDS_HDR(64,s)->len;
+    }
+    return 0;
+}
+
+static inline size_t sdsavail(const sds s) {
+    unsigned char flags = s[-1];
+    switch(flags&SDS_TYPE_MASK) {
+        case SDS_TYPE_5: {
+            return 0;
+        }
+        case SDS_TYPE_8: {
+            SDS_HDR_VAR(8,s);
+            return sh->alloc - sh->len;
+        }
+        case SDS_TYPE_16: {
+            SDS_HDR_VAR(16,s);
+            return sh->alloc - sh->len;
+        }
+        case SDS_TYPE_32: {
+            SDS_HDR_VAR(32,s);
+            return sh->alloc - sh->len;
+        }
+        case SDS_TYPE_64: {
+            SDS_HDR_VAR(64,s);
+            return sh->alloc - sh->len;
+        }
+    }
+    return 0;
+}
+
+static inline void sdssetlen(sds s, size_t newlen) {
+    unsigned char flags = s[-1];
+    switch(flags&SDS_TYPE_MASK) {
+        case SDS_TYPE_5:
+            {
+                unsigned char *fp = ((unsigned char*)s)-1;
+                *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+            }
+            break;
+        case SDS_TYPE_8:
+            SDS_HDR(8,s)->len = newlen;
+            break;
+        case SDS_TYPE_16:
+            SDS_HDR(16,s)->len = newlen;
+            break;
+        case SDS_TYPE_32:
+            SDS_HDR(32,s)->len = newlen;
+            break;
+        case SDS_TYPE_64:
+            SDS_HDR(64,s)->len = newlen;
+            break;
+    }
+}
+
+static inline void sdsinclen(sds s, size_t inc) {
+    unsigned char flags = s[-1];
+    switch(flags&SDS_TYPE_MASK) {
+        case SDS_TYPE_5:
+            {
+                unsigned char *fp = ((unsigned char*)s)-1;
+                unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
+                *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+            }
+            break;
+        case SDS_TYPE_8:
+            SDS_HDR(8,s)->len += inc;
+            break;
+        case SDS_TYPE_16:
+            SDS_HDR(16,s)->len += inc;
+            break;
+        case SDS_TYPE_32:
+            SDS_HDR(32,s)->len += inc;
+            break;
+        case SDS_TYPE_64:
+            SDS_HDR(64,s)->len += inc;
+            break;
+    }
+}
+
+/* sdsalloc() = sdsavail() + sdslen() */
+static inline size_t sdsalloc(const sds s) {
+    unsigned char flags = s[-1];
+    switch(flags&SDS_TYPE_MASK) {
+        case SDS_TYPE_5:
+            return SDS_TYPE_5_LEN(flags);
+        case SDS_TYPE_8:
+            return SDS_HDR(8,s)->alloc;
+        case SDS_TYPE_16:
+            return SDS_HDR(16,s)->alloc;
+        case SDS_TYPE_32:
+            return SDS_HDR(32,s)->alloc;
+        case SDS_TYPE_64:
+            return SDS_HDR(64,s)->alloc;
+    }
+    return 0;
+}
+
+static inline void sdssetalloc(sds s, size_t newlen) {
+    unsigned char flags = s[-1];
+    switch(flags&SDS_TYPE_MASK) {
+        case SDS_TYPE_5:
+            /* Nothing to do, this type has no total allocation info. */
+            break;
+        case SDS_TYPE_8:
+            SDS_HDR(8,s)->alloc = newlen;
+            break;
+        case SDS_TYPE_16:
+            SDS_HDR(16,s)->alloc = newlen;
+            break;
+        case SDS_TYPE_32:
+            SDS_HDR(32,s)->alloc = newlen;
+            break;
+        case SDS_TYPE_64:
+            SDS_HDR(64,s)->alloc = newlen;
+            break;
+    }
+}
+
+sds sdsnewlen(const void *init, size_t initlen);
+sds sdsnew(const char *init);
+sds sdsempty(void);
+sds sdsdup(const sds s);
+void sdsfree(sds s);
+sds sdsgrowzero(sds s, size_t len);
+sds sdscatlen(sds s, const void *t, size_t len);
+sds sdscat(sds s, const char *t);
+sds sdscatsds(sds s, const sds t);
+sds sdscpylen(sds s, const char *t, size_t len);
+sds sdscpy(sds s, const char *t);
+
+sds sdscatvprintf(sds s, const char *fmt, va_list ap);
+#ifdef __GNUC__
+sds sdscatprintf(sds s, const char *fmt, ...)
+    __attribute__((format(printf, 2, 3)));
+#else
+sds sdscatprintf(sds s, const char *fmt, ...);
+#endif
+
+sds sdscatfmt(sds s, char const *fmt, ...);
+sds sdstrim(sds s, const char *cset);
+void sdsrange(sds s, ssize_t start, ssize_t end);
+void sdsupdatelen(sds s);
+void sdsclear(sds s);
+int sdscmp(const sds s1, const sds s2);
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);
+void sdsfreesplitres(sds *tokens, int count);
+void sdstolower(sds s);
+void sdstoupper(sds s);
+sds sdsfromlonglong(long long value);
+sds sdscatrepr(sds s, const char *p, size_t len);
+sds *sdssplitargs(const char *line, int *argc);
+sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
+sds sdsjoin(char **argv, int argc, char *sep);
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
+
+/* Low level functions exposed to the user API */
+sds sdsMakeRoomFor(sds s, size_t addlen);
+void sdsIncrLen(sds s, ssize_t incr);
+sds sdsRemoveFreeSpace(sds s);
+size_t sdsAllocSize(sds s);
+void *sdsAllocPtr(sds s);
+
+/* Export the allocator used by SDS to the program using SDS.
+ * Sometimes the program SDS is linked to, may use a different set of
+ * allocators, but may want to allocate or free things that SDS will
+ * respectively free or allocate. */
+void *sds_malloc(size_t size);
+void *sds_realloc(void *ptr, size_t size);
+void sds_free(void *ptr);
+
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[]);
+#endif
+
+#endif
diff --git a/util/sds/sdsalloc.h b/util/sds/sdsalloc.h
new file mode 100644
index 0000000..f43023c
--- /dev/null
+++ b/util/sds/sdsalloc.h
@@ -0,0 +1,42 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* SDS allocator selection.
+ *
+ * This file is used in order to change the SDS allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#define s_malloc malloc
+#define s_realloc realloc
+#define s_free free
diff --git a/util/util.h b/util/util.h
new file mode 100644
index 0000000..ef378ca
--- /dev/null
+++ b/util/util.h
@@ -0,0 +1,78 @@
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#if defined(__GNUC__) || defined(__clang__)
+# define NORETURN __attribute__((__noreturn__))
+#else
+# define NORETURN
+#endif
+
+#if defined(__GNUC__)
+# define UNUSED __attribute__((__unused__))
+#else
+# define UNUSED
+#endif
+
+#define N_ENTRIES(x) (sizeof(x) / sizeof(*x))
+#define member_size(type, member) sizeof(((type *)(0))->member)
+#define TYPEOF(x) __typeof__(x)
+
+#define PACKED __attribute__((__packed__))
+
+#if defined(__GNUC__)
+# define FALLTHROUGH __attribute__ ((fallthrough))
+#else
+# define FALLTHROUGH
+#endif
+
+NORETURN void die(const char *format, ...) __attribute__((format (printf, 1, 2)));
+NORETURN void die_errno(const char *format, ...) __attribute__((format (printf, 1, 2)));
+
+void err(const char * format, ...) __attribute__((format (printf, 1, 2)));
+void err_errno(const char * format, ...) __attribute__((format (printf, 1, 2)));
+
+#define XREALLOC_ARRAY(x, alloc) ((x) = xrealloc((x), (alloc) * sizeof(*(x))))
+#define ALLOC_ARRAY(x, n) (malloc((n) * sizeof(*x)))
+
+#define ALIGN_ADDR_MASK(addr, mask) \
+	((addr + ((mask) - 1)) & ~((mask) - 1))
+
+#define ALIGN_ADDR_16(addr) ALIGN_ADDR_MASK(addr, 0x10)
+#define ALIGN_ADDR_32(addr) ALIGN_ADDR_MASK(addr, 0x20)
+#define ALIGN_ADDR_64(addr) ALIGN_ADDR_MASK(addr, 0x40)
+#define ALIGN_ADDR(addr, bytes) ALIGN_ADDR_MASK(addr, (8ULL*(bytes)))
+
+// Scaling: 0, 24, 66, 140, 269, 495, 890
+#define ALLOC_NC(x) (((x)+14)*7/4)
+
+#define AM_RESIZE_ALLOC(buf, sz, cap, item_sz, alloc_nc) \
+	do {                                                  \
+		TYPEOF(sz) _sz = (sz);                             \
+		if((cap) < _sz) {                                  \
+			void * nb;                                      \
+			TYPEOF(cap) nc;                                 \
+			if(_sz < alloc_nc((cap))) {                     \
+				nc = alloc_nc((cap));                        \
+			} else {                                        \
+				nc = _sz;                                    \
+			}                                               \
+			nb = realloc((buf), (item_sz) * nc);            \
+			if(nb) {                                        \
+				(cap) = nc;                                  \
+				(buf) = nb;                                  \
+			} else {                                        \
+				free((buf));                                 \
+				(buf) = NULL;                                \
+			}                                               \
+		}                                                  \
+	} while(0)
+
+#define AM_RESIZE(buf, sz, cap) \
+	AM_RESIZE_ALLOC(buf, sz, cap, sizeof(*buf), ALLOC_NC)
+
+#endif
-- 
cgit v1.2.3