aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorflu0r1ne <flu0r1ne@flu0r1ne.net>2022-10-30 19:30:29 -0500
committerflu0r1ne <flu0r1ne@flu0r1ne.net>2022-10-30 19:30:29 -0500
commit20e52f326cdf1b6c2ca9b2c0b5be07637d9196d2 (patch)
tree1d957cfd5ca8b9ccd0a91fa5d5415599edd241d1
downloadqidx-20e52f326cdf1b6c2ca9b2c0b5be07637d9196d2.tar.xz
qidx-20e52f326cdf1b6c2ca9b2c0b5be07637d9196d2.zip
Initial commit
-rw-r--r--README.md22
-rw-r--r--main.c928
-rw-r--r--makefile3
-rw-r--r--util/err.c80
-rw-r--r--util/hash.c46
-rw-r--r--util/hash.h27
-rw-r--r--util/prime_search.c344
-rw-r--r--util/prime_search.h8
-rw-r--r--util/sds/sds.c1300
-rw-r--r--util/sds/sds.h274
-rw-r--r--util/sds/sdsalloc.h42
-rw-r--r--util/util.h78
12 files changed, 3152 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ff14ab3
--- /dev/null
+++ b/README.md
@@ -0,0 +1,22 @@
+Bam Query Index (qidx)
+=====================
+
+> Warning: this is a work in progress
+
+`qidx` is tool for indexing BAM alignments by query name. While the
+samtools have the ability to sort data by query name (also called
+the read name), there htslib does not provide built-in utilities
+to retrieve alignments by query name. This can be advantageous
+for examining multi-mapped alignments.
+
+While a utility [bri](https://github.com/jts/bri) predated `qidx`
+providing the same utilities, it reads all alignments into memory
+which is impractical for most human genome data.
+
+Notes:
+
+- `qidx` creates a disk-backed using a sparse memory-mapped file. The underlying
+operating system must support `mmap` and file holes
+- `qidx` doesn't currently support compression. it is currently recommended to
+use block-level compression (such as `zfs` `zstd` compression)
+- the bamfile must be sorted by query name before the index is built `samtools sort -n`
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..aa2f3d6
--- /dev/null
+++ b/main.c
@@ -0,0 +1,928 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <htslib/sam.h>
+#include <htslib/hts.h>
+#include <htslib/bgzf.h>
+#include <stdbool.h>
+
+#include "util/sds/sds.h"
+#include "util/prime_search.h"
+#include "util/hash.h"
+#include "util/util.h"
+
+#include <endian.h>
+
+// @abstract Structure representing an unique alignment and virtual file pointer
+// @field tid chromosome ID
+// @field pos 0-based leftmost coordinate
+// @field vptr virtual pointer
+typedef struct {
+ uint32_t tid;
+ uint32_t pos;
+ uint64_t vptr;
+} aln_spec_t;
+
+
+#pragma pack(push, 1)
+
+typedef struct {
+ uint32_t tid;
+ uint32_t pos;
+ uint64_t vptr;
+} aln_spec_packed_t;
+
+#pragma pack(pop)
+
+// constrains (qname < 2 ** 32)
+
+// @abstract Structure representing all alignments by queryname
+// @field qname queryname (name for the given read)
+// @field n_alns number of alignments
+// @field alns array with alignments structures
+typedef struct {
+ char * qname;
+ uint16_t n_alns;
+ aln_spec_t * alns;
+} qidx_record_t;
+
+// @abstract Get the marshaled size of a record
+static size_t qidx_record_msize(qidx_record_t const * q) {
+ return (
+ strlen(q->qname) + 1 +
+ sizeof(uint16_t) +
+ sizeof(aln_spec_packed_t) * q->n_alns
+ );
+}
+
+// @abstract Martial a record to a fixed buffer
+//
+// @param q query index record
+// @param dst destination buffer
+// @param dstlen length of the destination buffer
+//
+// @return returns the number of bytes written to the buffer
+// or < 0 if the destination buffer is exceeded
+ssize_t qidx_record_marital(qidx_record_t const * q, void * dst, size_t dstlen) {
+ size_t rem = dstlen;
+
+ size_t n = strlen(q->qname) + 1;
+ if(rem < n)
+ return -1;
+ rem -= n;
+
+ {
+ char * _dst = dst;
+ strcpy(_dst, q->qname);
+ dst = (_dst + n);
+ }
+
+ size_t sz = 0;
+ sz += sizeof(uint16_t);
+ sz += sizeof(aln_spec_packed_t) * q->n_alns;
+
+ if(rem < sz)
+ return -1;
+ rem -= sz;
+
+ {
+ uint16_t * _dst = dst;
+ *_dst = htole16(q->n_alns);
+ dst = _dst + 1;
+ }
+
+ aln_spec_packed_t * buf_aln = dst;
+ for(size_t i = 0; i < q->n_alns; i++) {
+ aln_spec_t * aln = &q->alns[i];
+
+ buf_aln->tid = htole32(aln->tid);
+ buf_aln->pos = htole32(aln->pos);
+ buf_aln->vptr = htole64(aln->vptr);
+
+ buf_aln++;
+ }
+
+ return dstlen - rem;
+}
+
+// @abstract Helper to parse the query name and the number of alignments
+// @returns NULL if a parsing error occured else the location after the buffer
+static void const * _parse_record_header(void const * buf, size_t buflen,
+ char const ** qname, uint16_t * n_alns) {
+
+ size_t n = strnlen(buf, buflen) + 1;
+ *qname = buf;
+
+ if(n + sizeof(uint16_t) > buflen)
+ return NULL;
+
+ uint16_t const * _n_alns = (uint16_t const *) (*qname + n);
+ *n_alns = le64toh(*_n_alns);
+ buf = _n_alns + 1;
+
+ if(sizeof(aln_spec_packed_t) * *n_alns > buflen)
+ return NULL;
+
+ return buf;
+}
+
+// @abstract Unmartial a query record. The resulting memory needs to be freed.
+// All strings are sds strings.
+//
+// @param q query index record
+// @param buf source buffer
+// @param buflen source buffer length
+//
+// @return a pointer within buf after the record
+// or NULL if a parsing error occurred
+void const * qidx_record_unmartial(qidx_record_t * q, void const * buf, size_t buflen) {
+ char const * qname;
+
+ if(!(buf = _parse_record_header(buf, buflen, &qname, &q->n_alns))) {
+ return NULL;
+ }
+
+ q->qname = sdsnew(qname);
+ q->alns = calloc(q->n_alns, sizeof(aln_spec_t));
+
+ aln_spec_packed_t const * buf_aln = buf;
+ for(size_t i = 0; i < q->n_alns; i++) {
+ aln_spec_t * aln = &q->alns[i];
+
+ aln->pos = le32toh(buf_aln->pos);
+ aln->tid = le32toh(buf_aln->tid);
+ aln->vptr = le64toh(buf_aln->vptr);
+
+ buf_aln++;
+ }
+
+ return buf_aln;
+}
+
+// @abstract Find a record efficiently in a marshaled buffer of query index records
+//
+// @param buf source buffer
+// @param buflen length of the source buffer
+// @param qname target queryname
+//
+// @returns a pointer to a target query or NULL if it a record with the target query name
+// does not exist in the buffer
+void const * qidx_record_martial_find(void const * buf, size_t buflen, char const * qname) {
+ size_t n = strnlen(buf, buflen);
+
+ char const * _qname;
+ uint16_t n_alns;
+
+ void const * cur = buf,
+ * next = NULL;
+
+ while((next = _parse_record_header(cur, buflen, &_qname, &n_alns))) {
+
+ if(strcmp(_qname, qname) == 0) {
+ return cur;
+ }
+
+ next = (aln_spec_packed_t *) next + n_alns;
+ size_t l = (char const *) next - (char const *) cur;
+ buflen -= l;
+ cur = next;
+
+ if(!buflen) {
+ return NULL;
+ }
+ }
+
+ // Invalid
+ return NULL;
+}
+
+typedef enum {
+ QIDX_OK = 0,
+ QIDX_ITER_DONE,
+ QIDX_NO_MEM,
+ QIDX_NO_BAM_HDR,
+ QIDX_BAM_READ_FAILURE,
+ QIDX_REC_ITER_FAILURE,
+ QIDX_NOT_SORTED,
+ QIDX_BUCKET_MAX_SIZE_EXCEEDED,
+ QIDX_MAP_FAIL,
+ QIDX_IO_FAIL,
+ QIDX_FAILED_TO_OPEN_BAMFILE,
+ QIDX_FAILED_TO_OPEN_INDEX_FILE,
+ QIDX_INVALID_VERSION,
+ QIDX_INVALID_MAGIC,
+} qidx_err_t;
+
+bool qidx_errno(qidx_err_t err) {
+ switch(err) {
+ case QIDX_MAP_FAIL:
+ case QIDX_IO_FAIL:
+ return true;
+ default:
+ return false;
+ }
+
+}
+
+char const * qidx_strerr(qidx_err_t err) {
+ switch(err) {
+ case QIDX_NO_MEM: return "memory was exausted";
+ case QIDX_NO_BAM_HDR: return "parsing BAM header failed";
+ case QIDX_BAM_READ_FAILURE: return "failed to read bamfile";
+ case QIDX_REC_ITER_FAILURE: return "bam query index record iterator failed";
+ case QIDX_NOT_SORTED: return "input bam file not sorted by query name";
+ case QIDX_BUCKET_MAX_SIZE_EXCEEDED: return "maximum query size exceeded";
+ case QIDX_MAP_FAIL: return "mmap failed";
+ case QIDX_IO_FAIL: return "qidx io failure";
+ case QIDX_FAILED_TO_OPEN_BAMFILE: return "failed to open bamfile";
+ case QIDX_FAILED_TO_OPEN_INDEX_FILE: return "failed to open indexfile";
+ case QIDX_INVALID_MAGIC: return "invalid magic number (not a qidx or corrupted)";
+ case QIDX_INVALID_VERSION: return "qidx version outdated, please regenerate";
+ case QIDX_ITER_DONE:
+ case QIDX_OK:
+ return "OK";
+ }
+}
+
+qidx_err_t init_qidx(qidx_record_t * r) {
+ return QIDX_OK;
+}
+
+void qidx_free(qidx_record_t * r) {
+ free(r);
+}
+
+/*
+ * SAMTOOLS SORTING ALGORITHM
+ */
+
+// strnum_cmp is the string comparator used by samtools sort
+// to sort bam files by read names (e.g. QNAME)
+//
+// The input file is expected to be ordered accordingly. This
+// is verified at runtime.
+#define is_digit(c) ((c)<='9' && (c)>='0')
+static int strnum_cmp(const char *_a, const char *_b)
+{
+ const unsigned char *a = (const unsigned char*)_a, *b = (const unsigned char*)_b;
+ const unsigned char *pa = a, *pb = b;
+ while (*pa && *pb) {
+ if (!is_digit(*pa) || !is_digit(*pb)) {
+ if (*pa != *pb)
+ return (int)*pa - (int)*pb;
+ ++pa; ++pb;
+ } else {
+ // skip leading zeros
+ while (*pa == '0') ++pa;
+ while (*pb == '0') ++pb;
+
+ // skip matching digits
+ while (is_digit(*pa) && *pa == *pb)
+ pa++, pb++;
+
+ // Now mismatching, so see which ends the number sooner
+ int diff = (int)*pa - (int)*pb;
+ while (is_digit(*pa) && is_digit(*pb))
+ pa++, pb++;
+
+ if (is_digit(*pa))
+ return 1; // pa still going, so larger
+ else if (is_digit(*pb))
+ return -1; // pb still going, so larger
+ else if (diff)
+ return diff; // same length, so earlier diff
+ }
+ }
+ return *pa? 1 : *pb? -1 : 0;
+}
+
+
+typedef struct {
+ htsFile *fp;
+ bam1_t * b;
+ bam_hdr_t * hdr;
+
+ uint64_t file_offset;
+ sds queryname;
+ int ret;
+
+ aln_spec_t * aln_specs;
+ uint16_t n_aln_cap;
+ uint16_t n_aln_sz;
+
+ qidx_record_t * pending_release;
+} qidx_record_iter_t;
+
+qidx_err_t qidx_record_iter_init(qidx_record_iter_t *it, htsFile *fp) {
+ it->b = bam_init1();
+ it->fp = fp;
+
+ // advance past header
+ it->hdr = sam_hdr_read(fp);
+
+ if(!it->hdr) {
+ return QIDX_NO_BAM_HDR;
+ }
+
+ it->file_offset = bgzf_tell(fp->fp.bgzf);
+
+ if((it->ret = sam_read1(it->fp, it->hdr, it->b)) < 0) {
+ return QIDX_BAM_READ_FAILURE;
+ }
+
+ it->queryname = sdsnew(bam_get_qname(it->b));
+ it->aln_specs = NULL;
+ it->n_aln_sz = it->n_aln_cap = 0;
+
+ if(bgzf_seek(fp->fp.bgzf, it->file_offset, SEEK_SET) < 0) {
+ return QIDX_IO_FAIL;
+ }
+
+ it->pending_release = NULL;
+
+ return QIDX_OK;
+}
+
+static void _qidx_record_iter_free_record(qidx_record_iter_t * it) {
+ if(it->pending_release) {
+ free(it->pending_release->alns);
+ sdsfree(it->pending_release->qname);
+ it->pending_release = NULL;
+ }
+}
+
+qidx_err_t qidx_record_iter_next(qidx_record_iter_t * it, qidx_record_t * rec) {
+
+ _qidx_record_iter_free_record(it);
+
+ sds queryname;
+ int cmp;
+ int ret;
+ bool yield = false;
+ while ((ret = sam_read1(it->fp, it->hdr, it->b)) >= 0) {
+ queryname = sdsnew(bam_get_qname(it->b));
+
+ if(!queryname) {
+ return QIDX_NO_MEM;
+ }
+
+ if((cmp = strnum_cmp(it->queryname, queryname)) != 0) {
+ if(cmp > 0) {
+ printf("Err: the file is not sorted\n");
+ return QIDX_NOT_SORTED;
+ }
+
+ rec->n_alns = it->n_aln_sz;
+ rec->alns = it->aln_specs;
+ rec->qname = it->queryname;
+
+ it->pending_release = rec;
+
+ yield = true;
+
+ it->queryname = sdsdup(queryname);
+ it->aln_specs = NULL;
+ it->n_aln_cap = it->n_aln_sz = 0;
+ }
+
+ AM_RESIZE(it->aln_specs, it->n_aln_sz + 1, it->n_aln_cap);
+
+ it->aln_specs[it->n_aln_sz].pos = it->b->core.pos;
+ it->aln_specs[it->n_aln_sz].tid = it->b->core.tid;
+ it->aln_specs[it->n_aln_sz].vptr = it->file_offset;
+
+ it->n_aln_sz++;
+
+ // update offset for next record
+ it->file_offset = bgzf_tell(it->fp->fp.bgzf);
+
+ sdsfree(queryname);
+
+ if(yield) {
+ break;
+ }
+ }
+
+ if(ret < -1) {
+ it->pending_release = NULL;
+ return QIDX_BAM_READ_FAILURE;
+ } else if (ret == -1 && it->queryname != NULL) {
+ rec->n_alns = it->n_aln_sz;
+ rec->alns = it->aln_specs;
+ rec->qname = it->queryname;
+
+ it->pending_release = rec;
+
+ yield = true;
+
+ it->queryname = NULL;
+ it->aln_specs = NULL;
+ it->n_aln_cap = it->n_aln_sz = 0;
+ }
+
+ if(!yield) {
+ return QIDX_ITER_DONE;
+ }
+
+ return QIDX_OK;
+}
+
+qidx_err_t qidx_record_iter_destroy(qidx_record_iter_t * it) {
+ _qidx_record_iter_free_record(it);
+
+ sdsfree(it->queryname);
+
+ bam_hdr_destroy(it->hdr);
+ bam_destroy1(it->b);
+
+ return QIDX_OK;
+}
+
+#include <math.h>
+
+// @abstract Structure containing summary statistics estimated using
+// a one-pass algorithm
+// @field mu mean
+// @field s sample variance
+// @field n number of entries
+typedef struct {
+ double mu;
+ double s;
+ size_t n;
+} roll_est_params_t;
+
+// @abstract observe a data point
+void roll_est_add_evidence(roll_est_params_t * p, double x) {
+ double delta = x - p->mu;
+ p->mu += delta / (p->n + 1);
+ p->s += (x - p->mu) * delta;
+ p->n++;
+}
+
+// @abstract finalize the estimate
+void roll_est_finalize(roll_est_params_t * p) {
+ p->s /= p->n;
+}
+
+// Calculates the maximum number of items placed into a single bucket
+// to not exceed the bucket_size with a probability specified by the
+// area to the right of the positive z-score z. E.g. at z=2 there is
+// 2.4% chance of the soft limit being exceeded given objects are
+// assigned uniformly to each bucket. (The cardinality of each bucket
+// is the same.)
+//
+// In order for the maximum size to (really) not be exceeded, the
+// binomial distribution of items in each bucket would need to be
+// included. It is recommended to set this soft limit much lower
+// than the hard limit to not exceed it.
+//
+// This is derived from the standard error as measured by the sampling
+// distribution.
+
+// @param soft_limit a soft limit on the number of variable-length items
+// stuffed into a single bucket
+// @param z the z-score with which this will happen
+// @param p the parameters of the normal distribution
+size_t estimate_n_buckets(double soft_limit, double z, roll_est_params_t const * p) {
+ double n = sqrt(z * z * p->s + 4 * p->mu * soft_limit) - z * sqrt(p->s);
+ n = n / (2 * p->mu);
+
+ int max_bucket_size = floor(n * n);
+
+ return (size_t) ceil((double) p->n / (double) max_bucket_size);
+}
+
+typedef struct {
+ uint64_t offset;
+ uint32_t n_bytes;
+} qidx_bucket_t;
+
+typedef struct {
+ uint32_t n_buckets;
+ uint32_t max_bucket_size;
+ qidx_bucket_t * buckets;
+ char * rb_start;
+} qidx_htable_t;
+
+#define QIDX_MAGIC 0x5d1de6b4
+#define QIDX_VERSION 1
+
+#pragma pack(push, 1)
+
+typedef struct {
+ uint32_t magic;
+ uint16_t version;
+ uint32_t n_buckets;
+ uint32_t max_bucket_size;
+} qidx_header_t;
+
+#pragma pack(pop)
+
+typedef struct {
+ int fd;
+ void * map;
+ size_t len;
+ bool read_only;
+ qidx_header_t * hdr;
+ qidx_bucket_t * buckets;
+ qidx_htable_t * htab;
+} qidx_fp_t;
+
+size_t qidx_file_size(uint32_t n_buckets, uint32_t max_bucket_size) {
+ size_t sz = 0;
+
+ sz += sizeof(qidx_header_t);
+ sz += sizeof(qidx_htable_t) * n_buckets;
+ sz += (uint64_t) n_buckets * (uint64_t) max_bucket_size;
+
+ return sz;
+}
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+// @abstract initialize htab, must first populate fp->buckets
+qidx_err_t _qidx_init_htab(qidx_fp_t * fp,
+ uint32_t n_buckets, uint32_t max_bucket_size) {
+ qidx_htable_t * htab = malloc(sizeof(qidx_htable_t));
+ if(!htab) {
+ return QIDX_NO_MEM;
+ }
+
+ htab->buckets = calloc(sizeof(qidx_bucket_t), n_buckets);
+ if(!htab->buckets) {
+ free(htab);
+ return QIDX_NO_MEM;
+ }
+
+ htab->n_buckets = n_buckets;
+ htab->max_bucket_size = max_bucket_size;
+
+ htab->rb_start = (void *) (fp->buckets + n_buckets);
+
+ fp->htab = htab;
+
+ return QIDX_OK;
+}
+
+
+qidx_err_t qidx_open(qidx_fp_t * fp, int fd) {
+
+ fp->fd = fd;
+
+ qidx_header_t hdr;
+ if(read(fd, &hdr, sizeof(qidx_header_t)) == -1) {
+ return QIDX_FAILED_TO_OPEN_INDEX_FILE;
+ }
+
+ if(le32toh(hdr.magic) != QIDX_MAGIC) {
+ return QIDX_INVALID_MAGIC;
+ }
+
+ if(le16toh(hdr.version) != QIDX_VERSION) {
+ return QIDX_INVALID_VERSION;
+ }
+
+ uint32_t max_bucket_size = le32toh(hdr.max_bucket_size);
+ uint32_t n_buckets = le32toh(hdr.n_buckets);
+
+ size_t sz = qidx_file_size(n_buckets, max_bucket_size);
+
+ if(lseek(fd, 0, SEEK_SET) == -1) {
+ return QIDX_IO_FAIL;
+ }
+
+ fp->read_only = true;
+ fp->map = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
+ fp->len = sz;
+
+ if(fp->map == MAP_FAILED) {
+ return QIDX_MAP_FAIL;
+ }
+
+ fp->hdr = fp->map;
+ fp->buckets = (qidx_bucket_t *)(fp->hdr + 1);
+
+ qidx_err_t err = _qidx_init_htab(fp, n_buckets, max_bucket_size);
+ if(err) { return err; }
+
+ for(size_t i = 0; i < n_buckets; i++) {
+ fp->htab->buckets[i].n_bytes = le32toh(fp->buckets[i].n_bytes);
+ fp->htab->buckets[i].offset = le64toh(fp->buckets[i].offset);
+ }
+
+ return QIDX_OK;
+}
+
+qidx_err_t qidx_create(qidx_fp_t * fp, int fd, uint32_t _n_buckets, uint32_t max_bucket_size) {
+
+ uint32_t n_buckets = next_prime_above_lowerbound(_n_buckets);
+
+ size_t sz = qidx_file_size(n_buckets, max_bucket_size);
+
+ fp->fd = fd;
+
+ if(ftruncate(fd, sz) == -1) {
+ return QIDX_IO_FAIL;
+ }
+
+ fp->read_only = false;
+ fp->map = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ fp->len = sz;
+
+ if(fp->map == MAP_FAILED) {
+ return QIDX_MAP_FAIL;
+ }
+
+ fp->hdr = fp->map;
+
+ fp->hdr->magic = htole32(QIDX_MAGIC);
+ fp->hdr->version = htole16(QIDX_VERSION);
+ fp->hdr->n_buckets = htole32(n_buckets);
+ fp->hdr->max_bucket_size = htole32(max_bucket_size);
+
+ fp->buckets = (qidx_bucket_t *) (fp->hdr + 1);
+
+ _qidx_init_htab(fp, n_buckets, max_bucket_size);
+
+ uint64_t off = (char *) fp->htab->rb_start - (char *) fp->map;
+ for(size_t i = 0; i < n_buckets; i++) {
+ fp->htab->buckets[i].offset = off;
+ fp->htab->buckets[i].n_bytes = 0;
+ off += max_bucket_size;
+ }
+
+ return QIDX_OK;
+}
+
+qidx_err_t qidx_close(qidx_fp_t * fp) {
+
+ qidx_htable_t * htab = fp->htab;
+
+ if(!fp->read_only) {
+ for(size_t i = 0; i < htab->n_buckets; i++) {
+ fp->buckets[i].n_bytes = htole32(htab->buckets[i].n_bytes);
+ fp->buckets[i].offset = htole64(htab->buckets[i].offset);
+ }
+ }
+
+ free(htab->buckets);
+ free(htab);
+
+ if(munmap(fp->map, fp->len) != -1) {
+ return QIDX_MAP_FAIL;
+ }
+
+ return QIDX_OK;
+}
+
+qidx_htable_t * qidx_get_htable(qidx_fp_t * fp) { return fp->htab; }
+
+static qidx_bucket_t * _qidx_bucket(char const * qname, qidx_htable_t const * tab) {
+ size_t hash = murmur_hash_str_64s(qname, 0x12345);
+
+ qidx_bucket_t * bucket = &tab->buckets[hash % tab->n_buckets];
+
+ return bucket;
+}
+
+qidx_err_t qidx_table_add(qidx_htable_t * tab, qidx_record_t const * rec) {
+ qidx_bucket_t * bucket = _qidx_bucket(rec->qname, tab);
+
+ void * ptr = (char *) tab->rb_start + bucket->offset + bucket->n_bytes;
+ size_t rem = tab->max_bucket_size - bucket->n_bytes;
+
+ ssize_t wrlen = qidx_record_marital(rec, ptr, rem);
+
+ if(wrlen < 0) {
+ return QIDX_BUCKET_MAX_SIZE_EXCEEDED;
+ }
+
+ bucket->n_bytes += wrlen;
+
+ return QIDX_OK;
+}
+
+qidx_err_t qidx_table_get(qidx_htable_t const * tab,
+ char const * qname, qidx_record_t ** rec) {
+ qidx_bucket_t * bucket = _qidx_bucket(qname, tab);
+
+ void const * bucket_start = tab->rb_start + bucket->offset;
+ void const * record_start =
+ qidx_record_martial_find(bucket_start, bucket->n_bytes, qname);
+
+ if(!record_start) {
+ *rec = NULL;
+ } else {
+ size_t rem = bucket->n_bytes -
+ ((char const *) record_start - (char const *) bucket_start);
+ *rec = malloc(sizeof(qidx_record_t));
+
+ if(!*rec) { return QIDX_NO_MEM; }
+
+ void const * next = qidx_record_unmartial(*rec, record_start, rem);
+
+ if(!next) {
+ return QIDX_IO_FAIL;
+ }
+ }
+
+ return QIDX_OK;
+}
+
+void qidx_die_err(qidx_err_t err) {
+ char const * errstr = qidx_strerr(err);
+ if(qidx_errno(err)) {
+ die_errno("err: %s", errstr);
+ } else {
+ die("err: %s", errstr);
+ }
+}
+
+void qidx_record_print(FILE * file, qidx_record_t * rec) {
+ fprintf(file, "qname: %s\n", rec->qname);
+ for(size_t i = 0; i < rec->n_alns; i++) {
+ fprintf(file, "\tTID: %u\n", rec->alns[i].tid);
+ fprintf(file, "\tPOS: %u\n", rec->alns[i].pos);
+ fprintf(file, "\tVPTR: %lu\n", rec->alns[i].vptr);
+ }
+}
+
+qidx_err_t qidx_create_index(char const * bamfile, char const * indexfile) {
+ qidx_err_t err;
+
+ htsFile *b_fp = hts_open(bamfile, "r");
+ if(b_fp == NULL) { return QIDX_FAILED_TO_OPEN_BAMFILE; }
+
+ // Pass 1:
+ // - estimate mu, s, n of the distribution of record sizes
+ // - estimate the number of buckets needed in the hash table
+ qidx_record_iter_t it;
+ if((err = qidx_record_iter_init(&it, b_fp))) {
+ return err;
+ }
+
+ roll_est_params_t params;
+ memset(&params, 0, sizeof(roll_est_params_t));
+ qidx_record_t rec;
+ while(!(err = qidx_record_iter_next(&it, &rec))) {
+ double sz = (double) qidx_record_msize(&rec);
+ roll_est_add_evidence(&params, sz);
+ }
+ if(err != QIDX_ITER_DONE) return err;
+
+ if ((err = qidx_record_iter_destroy(&it))) {
+ return err;
+ }
+
+ roll_est_finalize(&params);
+
+ double soft_limit = 8 * 1024;
+ double z = 5;
+ uint32_t n_buckets = estimate_n_buckets(soft_limit, z, &params);
+ uint32_t max_bucket_size = 1 << 24;
+
+ printf("record size (mean): %f\n", params.mu);
+ printf("record size (variance): %f\n", params.s);
+ printf("number of records: %zu\n", params.n);
+ printf("number of buckets: %u\n", n_buckets);
+
+ FILE * qidx = fopen(indexfile, "wb+");
+
+ if(!qidx) { return QIDX_FAILED_TO_OPEN_INDEX_FILE; }
+
+ int fd = fileno(qidx);
+
+ // PASS 2:
+ // - add records to index
+
+ // Rewind to start of BAM file
+ if (-1 == bgzf_seek(b_fp->fp.bgzf, 0, SEEK_SET)) {
+ return QIDX_IO_FAIL;
+ }
+
+ qidx_fp_t q_fp;
+ if((err = qidx_create(&q_fp, fd, n_buckets, max_bucket_size))) {
+ qidx_die_err(err);
+ }
+
+ qidx_htable_t * htab = qidx_get_htable(&q_fp);
+
+ if ((err = qidx_record_iter_init(&it, b_fp))) {
+ return err;
+ }
+
+ while((err = qidx_record_iter_next(&it, &rec)) == QIDX_OK) {
+ qidx_table_add(htab, &rec);
+ }
+ if(err != QIDX_ITER_DONE) return err;
+
+ if ((err = qidx_record_iter_destroy(&it))) {
+ return err;
+ }
+
+ qidx_close(&q_fp);
+ fclose(qidx);
+ hts_close(b_fp);
+
+ return QIDX_OK;
+}
+
+qidx_err_t qidx_search_index(char const * indexfile,
+ char const * bamfile, char const * qname) {
+ FILE * index = fopen(indexfile, "rb");
+ if(!index) { return QIDX_FAILED_TO_OPEN_INDEX_FILE; }
+
+ int fd = fileno(index);
+
+ qidx_fp_t q_fp;
+ qidx_err_t err = qidx_open(&q_fp, fd);
+ if(err) return err;
+
+ qidx_htable_t * htab = qidx_get_htable(&q_fp);
+
+ qidx_record_t * rec;
+ if((err = qidx_table_get(htab, qname, &rec))) {
+ return err;
+ }
+
+ if(rec) {
+ htsFile * fp = hts_open(bamfile, "r");
+ htsFile* out_fp = hts_open("-", "w");
+
+
+ if(!fp) {
+ return QIDX_FAILED_TO_OPEN_BAMFILE;
+ }
+
+ bam1_t * b = bam_init1();
+ bam_hdr_t * hdr = sam_hdr_read(fp);
+ for(size_t i = 0; i < rec->n_alns; i++) {
+ aln_spec_t * aln = &rec->alns[i];
+
+ if(bgzf_seek(fp->fp.bgzf, aln->vptr, SEEK_SET) < 0) {
+ return QIDX_IO_FAIL;
+ }
+
+ int ret;
+ if((ret = sam_read1(fp, hdr, b)) >= 0) {
+
+ int ret = sam_write1(out_fp, hdr, b);
+ if(ret < 0) {
+ return QIDX_IO_FAIL;
+ }
+ }
+ }
+
+ sdsfree(rec->qname);
+ free(rec->alns);
+ free(rec);
+
+ bam_hdr_destroy(hdr);
+ bam_destroy1(b);
+ hts_close(out_fp);
+ hts_close(fp);
+ } else {
+ fprintf(stderr, "Could not find read: %s\n", qname);
+ }
+
+ return QIDX_OK;
+}
+
+void die_usage(char const * prog) {
+ err("%s {create,search}", prog);
+ err(" create [bamfile] [indexfile]");
+ die(" search [index] [read]");
+}
+
+int main(int argc, char ** argv) {
+
+ if(!argc) return 1;
+
+ if(argc < 3) die_usage(argv[0]);
+
+ char const * type = argv[1];
+ qidx_err_t err;
+
+ if(strcmp(type, "create") == 0) {
+ if(argc != 4) die_usage(argv[0]);
+ char const * bamfile = argv[2];
+ char const * indexfile = argv[3];
+ err = qidx_create_index(bamfile, indexfile);
+ if(err) {
+ qidx_die_err(err);
+ }
+ } else if (strcmp(type, "search") == 0) {
+ if(argc != 5) die_usage(argv[0]);
+ char const * indexfile = argv[2];
+ char const * bamfile = argv[3];
+ char const * qname = argv[4];
+ if((err = qidx_search_index(indexfile, bamfile, qname))) {
+ qidx_die_err(err);
+ }
+ } else {
+ die_usage(argv[0]);
+ }
+
+}
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..e412184
--- /dev/null
+++ b/makefile
@@ -0,0 +1,3 @@
+main:
+ gcc main.c ./util/hash.c ./util/prime_search.c -lhts -lm ./util/sds/sds.c ./util/err.c -g -o qidx -fsanitize=address
+.PHONY: main
diff --git a/util/err.c b/util/err.c
new file mode 100644
index 0000000..1f3fb6d
--- /dev/null
+++ b/util/err.c
@@ -0,0 +1,80 @@
+#include "util.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#define FORMAT_MAX_SIZE 1024
+
+extern int errno;
+
+void NORETURN die(char const * format, ...) {
+ va_list vargs;
+ va_start (vargs, format);
+
+ vfprintf(stderr, format, vargs);
+ fprintf(stderr, "\n");
+
+ va_end(vargs);
+ exit(1);
+}
+
+void err(char const * format, ...) {
+ va_list vargs;
+ va_start (vargs, format);
+
+ vfprintf(stderr, format, vargs);
+ fprintf(stderr, "\n");
+
+ va_end(vargs);
+}
+
+void format_with_errno(char * buf, size_t n, char const * format) {
+ char const * errstr = strerror(errno);
+ char fmt_err[256];
+
+ size_t i = 0,
+ j = 0;
+ while(errstr[j] && i < sizeof(fmt_err) - 1) {
+ if((fmt_err[i++] = errstr[j++]) != '%')
+ continue;
+
+ if(i < sizeof(fmt_err) - 1) {
+ fmt_err[i++] = '%';
+ } else {
+ i--;
+ }
+ }
+
+ fmt_err[i] = '\0';
+
+ snprintf(buf, n, "%s: %s", format, fmt_err);
+}
+
+
+void NORETURN die_errno(char const * format, ...) {
+ va_list vargs;
+ va_start (vargs, format);
+
+ char buf[FORMAT_MAX_SIZE];
+ format_with_errno(buf, sizeof(buf), format);
+ vfprintf(stderr, buf, vargs);
+ fprintf(stderr, "\n");
+ exit(1);
+
+ va_end(vargs);
+}
+
+void err_errno(char const * format, ...) {
+ va_list vargs;
+ va_start (vargs, format);
+
+ char buf[FORMAT_MAX_SIZE];
+
+ format_with_errno(buf, sizeof(buf), format);
+ vfprintf(stderr, buf, vargs);
+ fprintf(stderr, "\n");
+
+ va_end(vargs);
+}
diff --git a/util/hash.c b/util/hash.c
new file mode 100644
index 0000000..5f9b482
--- /dev/null
+++ b/util/hash.c
@@ -0,0 +1,46 @@
+#include "hash.h"
+
+uint64_t murmur_hash_64a(void const * key, int len, unsigned int seed)
+{
+ const uint64_t m = 0xc6a4a7935bd1e995;
+ const int r = 47;
+
+ uint64_t h = seed ^ (len * m);
+
+ const uint64_t * data = (const uint64_t *)key;
+ const uint64_t * end = data + (len/8);
+
+ while(data != end)
+ {
+ uint64_t k = *data++;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ const unsigned char * data2 = (const unsigned char*)data;
+
+ switch(len & 7)
+ {
+ case 7: h ^= (uint64_t)(data2[6]) << 48;
+ case 6: h ^= (uint64_t)(data2[5]) << 40;
+ case 5: h ^= (uint64_t)(data2[4]) << 32;
+ case 4: h ^= (uint64_t)(data2[3]) << 24;
+ case 3: h ^= (uint64_t)(data2[2]) << 16;
+ case 2: h ^= (uint64_t)(data2[1]) << 8;
+ case 1: h ^= (uint64_t)(data2[0]);
+ h *= m;
+ };
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+
+ return h;
+}
+
+
diff --git a/util/hash.h b/util/hash.h
new file mode 100644
index 0000000..0c6a546
--- /dev/null
+++ b/util/hash.h
@@ -0,0 +1,27 @@
+#ifndef UTIL_HASH_H
+#define UTIL_HASH_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+uint64_t murmur_hash_64a(void const * key, int len, unsigned int seed);
+
+static inline uint64_t murmur_hash_str_64s(char const * str, unsigned int seed) {
+ return murmur_hash_64a(str, strlen(str), seed);
+}
+
+static inline uint64_t murmur64(uint64_t h) {
+ h ^= h >> 33;
+ h *= 0xff51afd7ed558ccdLLU;
+ h ^= h >> 33;
+ h *= 0xc4ceb9fe1a85ec53LLU;
+ h ^= h >> 33;
+ return h;
+}
+
+static inline size_t murmur64_range(uint64_t h, size_t n) {
+ return murmur64(h) % n;
+}
+
+#endif /* ifndef UTIL_HASH_H */
diff --git a/util/prime_search.c b/util/prime_search.c
new file mode 100644
index 0000000..e717faa
--- /dev/null
+++ b/util/prime_search.c
@@ -0,0 +1,344 @@
+#include "prime_search.h"
+
+size_t const PRIME_TABLE[] = {
+ 0ul,
+ 2ul,
+ 3ul,
+ 5ul,
+ 7ul,
+ 11ul,
+ 13ul,
+ 17ul,
+ 19ul,
+ 23ul,
+ 29ul,
+ 31ul,
+ 37ul,
+ 41ul,
+ 43ul,
+ 47ul,
+ 53ul,
+ 59ul,
+ 61ul,
+ 67ul,
+ 71ul,
+ 73ul,
+ 79ul,
+ 83ul,
+ 89ul,
+ 97ul,
+ 101ul,
+ 103ul,
+ 107ul,
+ 109ul,
+ 113ul,
+ 127ul,
+ 131ul,
+ 137ul,
+ 139ul,
+ 149ul,
+ 151ul,
+ 157ul,
+ 163ul,
+ 167ul,
+ 173ul,
+ 179ul,
+ 181ul,
+ 191ul,
+ 193ul,
+ 197ul,
+ 199ul,
+ 211ul,
+ 227ul,
+ 241ul,
+ 257ul,
+ 277ul,
+ 293ul,
+ 313ul,
+ 337ul,
+ 359ul,
+ 383ul,
+ 409ul,
+ 439ul,
+ 467ul,
+ 503ul,
+ 541ul,
+ 577ul,
+ 619ul,
+ 661ul,
+ 709ul,
+ 761ul,
+ 823ul,
+ 887ul,
+ 953ul,
+ 1031ul,
+ 1109ul,
+ 1193ul,
+ 1289ul,
+ 1381ul,
+ 1493ul,
+ 1613ul,
+ 1741ul,
+ 1879ul,
+ 2029ul,
+ 2179ul,
+ 2357ul,
+ 2549ul,
+ 2753ul,
+ 2971ul,
+ 3209ul,
+ 3469ul,
+ 3739ul,
+ 4027ul,
+ 4349ul,
+ 4703ul,
+ 5087ul,
+ 5503ul,
+ 5953ul,
+ 6427ul,
+ 6949ul,
+ 7517ul,
+ 8123ul,
+ 8783ul,
+ 9497ul,
+ 10273ul,
+ 11113ul,
+ 12011ul,
+ 12983ul,
+ 14033ul,
+ 15173ul,
+ 16411ul,
+ 17749ul,
+ 19183ul,
+ 20753ul,
+ 22447ul,
+ 24281ul,
+ 26267ul,
+ 28411ul,
+ 30727ul,
+ 33223ul,
+ 35933ul,
+ 38873ul,
+ 42043ul,
+ 45481ul,
+ 49201ul,
+ 53201ul,
+ 57557ul,
+ 62233ul,
+ 67307ul,
+ 72817ul,
+ 78779ul,
+ 85229ul,
+ 92203ul,
+ 99733ul,
+ 107897ul,
+ 116731ul,
+ 126271ul,
+ 136607ul,
+ 147793ul,
+ 159871ul,
+ 172933ul,
+ 187091ul,
+ 202409ul,
+ 218971ul,
+ 236897ul,
+ 256279ul,
+ 277261ul,
+ 299951ul,
+ 324503ul,
+ 351061ul,
+ 379787ul,
+ 410857ul,
+ 444487ul,
+ 480881ul,
+ 520241ul,
+ 562841ul,
+ 608903ul,
+ 658753ul,
+ 712697ul,
+ 771049ul,
+ 834181ul,
+ 902483ul,
+ 976369ul,
+ 1056323ul,
+ 1142821ul,
+ 1236397ul,
+ 1337629ul,
+ 1447153ul,
+ 1565659ul,
+ 1693859ul,
+ 1832561ul,
+ 1982627ul,
+ 2144977ul,
+ 2320627ul,
+ 2510653ul,
+ 2716249ul,
+ 2938679ul,
+ 3179303ul,
+ 3439651ul,
+ 3721303ul,
+ 4026031ul,
+ 4355707ul,
+ 4712381ul,
+ 5098259ul,
+ 5515729ul,
+ 5967347ul,
+ 6456007ul,
+ 6984629ul,
+ 7556579ul,
+ 8175383ul,
+ 8844859ul,
+ 9569143ul,
+ 10352717ul,
+ 11200489ul,
+ 12117689ul,
+ 13109983ul,
+ 14183539ul,
+ 15345007ul,
+ 16601593ul,
+ 17961079ul,
+ 19431899ul,
+ 21023161ul,
+ 22744717ul,
+ 24607243ul,
+ 26622317ul,
+ 28802401ul,
+ 31160981ul,
+ 33712729ul,
+ 36473443ul,
+ 39460231ul,
+ 42691603ul,
+ 46187573ul,
+ 49969847ul,
+ 54061849ul,
+ 58488943ul,
+ 63278561ul,
+ 68460391ul,
+ 74066549ul,
+ 80131819ul,
+ 86693767ul,
+ 93793069ul,
+ 101473717ul,
+ 109783337ul,
+ 118773397ul,
+ 128499677ul,
+ 139022417ul,
+ 150406843ul,
+ 162723577ul,
+ 176048909ul,
+ 190465427ul,
+ 206062531ul,
+ 222936881ul,
+ 241193053ul,
+ 260944219ul,
+ 282312799ul,
+ 305431229ul,
+ 330442829ul,
+ 357502601ul,
+ 386778277ul,
+ 418451333ul,
+ 452718089ul,
+ 489790921ul,
+ 529899637ul,
+ 573292817ul,
+ 620239453ul,
+ 671030513ul,
+ 725980837ul,
+ 785430967ul,
+ 849749479ul,
+ 919334987ul,
+ 994618837ul,
+ 1076067617ul,
+ 1164186217ul,
+ 1259520799ul,
+ 1362662261ul,
+ 1474249943ul,
+ 1594975441ul,
+ 1725587117ul,
+ 1866894511ul,
+ 2019773507ul,
+ 2185171673ul,
+ 2364114217ul,
+ 2557710269ul,
+ 2767159799ul,
+ 2993761039ul,
+ 3238918481ul,
+ 3504151727ul,
+ 3791104843ul,
+ 4101556399ul,
+ 4294967291ul,
+#if __WORDSIZE == 64
+ 4294967291ul
+#else
+ 6442450933ul,
+ 8589934583ul,
+ 12884901857ul,
+ 17179869143ul,
+ 25769803693ul,
+ 34359738337ul,
+ 51539607367ul,
+ 68719476731ul,
+ 103079215087ul,
+ 137438953447ul,
+ 206158430123ul,
+ 274877906899ul,
+ 412316860387ul,
+ 549755813881ul,
+ 824633720731ul,
+ 1099511627689ul,
+ 1649267441579ul,
+ 2199023255531ul,
+ 3298534883309ul,
+ 4398046511093ul,
+ 6597069766607ul,
+ 8796093022151ul,
+ 13194139533241ul,
+ 17592186044399ul,
+ 26388279066581ul,
+ 35184372088777ul,
+ 52776558133177ul,
+ 70368744177643ul,
+ 105553116266399ul,
+ 140737488355213ul,
+ 211106232532861ul,
+ 281474976710597ul,
+ 562949953421231ul,
+ 1125899906842597ul,
+ 2251799813685119ul,
+ 4503599627370449ul,
+ 9007199254740881ul,
+ 18014398509481951ul,
+ 36028797018963913ul,
+ 72057594037927931ul,
+ 144115188075855859ul,
+ 288230376151711717ul,
+ 576460752303423433ul,
+ 1152921504606846883ul,
+ 2305843009213693951ul,
+ 4611686018427387847ul,
+ 9223372036854775783ul,
+ 18446744073709551557ul
+#endif
+};
+
+#define ENTS (sizeof(PRIME_TABLE) / sizeof(*PRIME_TABLE))
+
+size_t next_prime_above_lowerbound(size_t n) {
+ size_t const * a = PRIME_TABLE,
+ * b = PRIME_TABLE + (ENTS - 1),
+ * m;
+ do {
+ m = a + (b - a) / 2;
+
+ if(*m >= n) {
+ b = m;
+ } else if (*m < n) {
+ a = m + 1;
+ } else {
+ break;
+ }
+ } while(*a < *b);
+
+ return *b;
+}
+
diff --git a/util/prime_search.h b/util/prime_search.h
new file mode 100644
index 0000000..842c6d9
--- /dev/null
+++ b/util/prime_search.h
@@ -0,0 +1,8 @@
+#ifndef _NEXT_LARGEST_PRIME_H
+#define _NEXT_LARGEST_PRIME_H
+
+#include <stddef.h>
+
+size_t next_prime_above_lowerbound(size_t n);
+
+#endif /* ifndef _NEXT_LARGEST_PRIME_H */
diff --git a/util/sds/sds.c b/util/sds/sds.c
new file mode 100644
index 0000000..1189716
--- /dev/null
+++ b/util/sds/sds.c
@@ -0,0 +1,1300 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+#include <limits.h>
+#include "sds.h"
+#include "sdsalloc.h"
+
+const char *SDS_NOINIT = "SDS_NOINIT";
+
+static inline int sdsHdrSize(char type) {
+ switch(type&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return sizeof(struct sdshdr5);
+ case SDS_TYPE_8:
+ return sizeof(struct sdshdr8);
+ case SDS_TYPE_16:
+ return sizeof(struct sdshdr16);
+ case SDS_TYPE_32:
+ return sizeof(struct sdshdr32);
+ case SDS_TYPE_64:
+ return sizeof(struct sdshdr64);
+ }
+ return 0;
+}
+
+static inline char sdsReqType(size_t string_size) {
+ if (string_size < 1<<5)
+ return SDS_TYPE_5;
+ if (string_size < 1<<8)
+ return SDS_TYPE_8;
+ if (string_size < 1<<16)
+ return SDS_TYPE_16;
+#if (LONG_MAX == LLONG_MAX)
+ if (string_size < 1ll<<32)
+ return SDS_TYPE_32;
+ return SDS_TYPE_64;
+#else
+ return SDS_TYPE_32;
+#endif
+}
+
+/* Create a new sds string with the content specified by the 'init' pointer
+ * and 'initlen'.
+ * If NULL is used for 'init' the string is initialized with zero bytes.
+ * If SDS_NOINIT is used, the buffer is left uninitialized;
+ *
+ * The string is always null-termined (all the sds strings are, always) so
+ * even if you create an sds string with:
+ *
+ * mystring = sdsnewlen("abc",3);
+ *
+ * You can print the string with printf() as there is an implicit \0 at the
+ * end of the string. However the string is binary safe and can contain
+ * \0 characters in the middle, as the length is stored in the sds header. */
+sds sdsnewlen(const void *init, size_t initlen) {
+ void *sh;
+ sds s;
+ char type = sdsReqType(initlen);
+ /* Empty strings are usually created in order to append. Use type 8
+ * since type 5 is not good at this. */
+ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
+ int hdrlen = sdsHdrSize(type);
+ unsigned char *fp; /* flags pointer. */
+
+ sh = s_malloc(hdrlen+initlen+1);
+ if (sh == NULL) return NULL;
+ if (init==SDS_NOINIT)
+ init = NULL;
+ else if (!init)
+ memset(sh, 0, hdrlen+initlen+1);
+ s = (char*)sh+hdrlen;
+ fp = ((unsigned char*)s)-1;
+ switch(type) {
+ case SDS_TYPE_5: {
+ *fp = type | (initlen << SDS_TYPE_BITS);
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ }
+ if (initlen && init)
+ memcpy(s, init, initlen);
+ s[initlen] = '\0';
+ return s;
+}
+
+/* Create an empty (zero length) sds string. Even in this case the string
+ * always has an implicit null term. */
+sds sdsempty(void) {
+ return sdsnewlen("",0);
+}
+
+/* Create a new sds string starting from a null terminated C string. */
+sds sdsnew(const char *init) {
+ size_t initlen = (init == NULL) ? 0 : strlen(init);
+ return sdsnewlen(init, initlen);
+}
+
+/* Duplicate an sds string. */
+sds sdsdup(const sds s) {
+ return sdsnewlen(s, sdslen(s));
+}
+
+/* Free an sds string. No operation is performed if 's' is NULL. */
+void sdsfree(sds s) {
+ if (s == NULL) return;
+ s_free((char*)s-sdsHdrSize(s[-1]));
+}
+
+/* Set the sds string length to the length as obtained with strlen(), so
+ * considering as content only up to the first null term character.
+ *
+ * This function is useful when the sds string is hacked manually in some
+ * way, like in the following example:
+ *
+ * s = sdsnew("foobar");
+ * s[2] = '\0';
+ * sdsupdatelen(s);
+ * printf("%d\n", sdslen(s));
+ *
+ * The output will be "2", but if we comment out the call to sdsupdatelen()
+ * the output will be "6" as the string was modified but the logical length
+ * remains 6 bytes. */
+void sdsupdatelen(sds s) {
+ size_t reallen = strlen(s);
+ sdssetlen(s, reallen);
+}
+
+/* Modify an sds string in-place to make it empty (zero length).
+ * However all the existing buffer is not discarded but set as free space
+ * so that next append operations will not require allocations up to the
+ * number of bytes previously available. */
+void sdsclear(sds s) {
+ sdssetlen(s, 0);
+ s[0] = '\0';
+}
+
+/* Enlarge the free space at the end of the sds string so that the caller
+ * is sure that after calling this function can overwrite up to addlen
+ * bytes after the end of the string, plus one more byte for nul term.
+ *
+ * Note: this does not change the *length* of the sds string as returned
+ * by sdslen(), but only the free buffer space we have. */
+sds sdsMakeRoomFor(sds s, size_t addlen) {
+ void *sh, *newsh;
+ size_t avail = sdsavail(s);
+ size_t len, newlen;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+
+ /* Return ASAP if there is enough space left. */
+ if (avail >= addlen) return s;
+
+ len = sdslen(s);
+ sh = (char*)s-sdsHdrSize(oldtype);
+ newlen = (len+addlen);
+ if (newlen < SDS_MAX_PREALLOC)
+ newlen *= 2;
+ else
+ newlen += SDS_MAX_PREALLOC;
+
+ type = sdsReqType(newlen);
+
+ /* Don't use type 5: the user is appending to the string and type 5 is
+ * not able to remember empty space, so sdsMakeRoomFor() must be called
+ * at every appending operation. */
+ if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ /* Since the header size changes, need to move the string forward,
+ * and can't use realloc */
+ newsh = s_malloc(hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, newlen);
+ return s;
+}
+
+/* Reallocate the sds string so that it has no free space at the end. The
+ * contained string remains not altered, but next concatenation operations
+ * will require a reallocation.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdsRemoveFreeSpace(sds s) {
+ void *sh, *newsh;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
+ size_t len = sdslen(s);
+ size_t avail = sdsavail(s);
+ sh = (char*)s-oldhdrlen;
+
+ /* Return ASAP if there is no space left. */
+ if (avail == 0) return s;
+
+ /* Check what would be the minimum SDS header that is just good enough to
+ * fit this string. */
+ type = sdsReqType(len);
+ hdrlen = sdsHdrSize(type);
+
+ /* If the type is the same, or at least a large enough type is still
+ * required, we just realloc(), letting the allocator to do the copy
+ * only if really needed. Otherwise if the change is huge, we manually
+ * reallocate the string to use the different header type. */
+ if (oldtype==type || type > SDS_TYPE_8) {
+ newsh = s_realloc(sh, oldhdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+oldhdrlen;
+ } else {
+ newsh = s_malloc(hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, len);
+ return s;
+}
+
+/* Return the total size of the allocation of the specified sds string,
+ * including:
+ * 1) The sds header before the pointer.
+ * 2) The string.
+ * 3) The free buffer at the end if any.
+ * 4) The implicit null term.
+ */
+size_t sdsAllocSize(sds s) {
+ size_t alloc = sdsalloc(s);
+ return sdsHdrSize(s[-1])+alloc+1;
+}
+
+/* Return the pointer of the actual SDS allocation (normally SDS strings
+ * are referenced by the start of the string buffer). */
+void *sdsAllocPtr(sds s) {
+ return (void*) (s-sdsHdrSize(s[-1]));
+}
+
+/* Increment the sds length and decrements the left free space at the
+ * end of the string according to 'incr'. Also set the null term
+ * in the new end of the string.
+ *
+ * This function is used in order to fix the string length after the
+ * user calls sdsMakeRoomFor(), writes something after the end of
+ * the current string, and finally needs to set the new length.
+ *
+ * Note: it is possible to use a negative increment in order to
+ * right-trim the string.
+ *
+ * Usage example:
+ *
+ * Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the
+ * following schema, to cat bytes coming from the kernel to the end of an
+ * sds string without copying into an intermediate buffer:
+ *
+ * oldlen = sdslen(s);
+ * s = sdsMakeRoomFor(s, BUFFER_SIZE);
+ * nread = read(fd, s+oldlen, BUFFER_SIZE);
+ * ... check for nread <= 0 and handle it ...
+ * sdsIncrLen(s, nread);
+ */
+void sdsIncrLen(sds s, ssize_t incr) {
+ unsigned char flags = s[-1];
+ size_t len;
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char oldlen = SDS_TYPE_5_LEN(flags);
+ assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
+ *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
+ len = oldlen+incr;
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ default: len = 0; /* Just to avoid compilation warnings. */
+ }
+ s[len] = '\0';
+}
+
+/* Grow the sds to have the specified length. Bytes that were not part of
+ * the original length of the sds will be set to zero.
+ *
+ * if the specified length is smaller than the current length, no operation
+ * is performed. */
+sds sdsgrowzero(sds s, size_t len) {
+ size_t curlen = sdslen(s);
+
+ if (len <= curlen) return s;
+ s = sdsMakeRoomFor(s,len-curlen);
+ if (s == NULL) return NULL;
+
+ /* Make sure added region doesn't contain garbage */
+ memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
+ sdssetlen(s, len);
+ return s;
+}
+
+/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
+ * end of the specified sds string 's'.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatlen(sds s, const void *t, size_t len) {
+ size_t curlen = sdslen(s);
+
+ s = sdsMakeRoomFor(s,len);
+ if (s == NULL) return NULL;
+ memcpy(s+curlen, t, len);
+ sdssetlen(s, curlen+len);
+ s[curlen+len] = '\0';
+ return s;
+}
+
+/* Append the specified null termianted C string to the sds string 's'.
+ *
+ * After the call, the passed sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscat(sds s, const char *t) {
+ return sdscatlen(s, t, strlen(t));
+}
+
+/* Append the specified sds 't' to the existing sds 's'.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatsds(sds s, const sds t) {
+ return sdscatlen(s, t, sdslen(t));
+}
+
+/* Destructively modify the sds string 's' to hold the specified binary
+ * safe string pointed by 't' of length 'len' bytes. */
+sds sdscpylen(sds s, const char *t, size_t len) {
+ if (sdsalloc(s) < len) {
+ s = sdsMakeRoomFor(s,len-sdslen(s));
+ if (s == NULL) return NULL;
+ }
+ memcpy(s, t, len);
+ s[len] = '\0';
+ sdssetlen(s, len);
+ return s;
+}
+
+/* Like sdscpylen() but 't' must be a null-termined string so that the length
+ * of the string is obtained with strlen(). */
+sds sdscpy(sds s, const char *t) {
+ return sdscpylen(s, t, strlen(t));
+}
+
+/* Helper for sdscatlonglong() doing the actual number -> string
+ * conversion. 's' must point to a string with room for at least
+ * SDS_LLSTR_SIZE bytes.
+ *
+ * The function returns the length of the null-terminated string
+ * representation stored at 's'. */
+#define SDS_LLSTR_SIZE 21
+int sdsll2str(char *s, long long value) {
+ char *p, aux;
+ unsigned long long v;
+ size_t l;
+
+ /* Generate the string representation, this method produces
+ * an reversed string. */
+ v = (value < 0) ? -value : value;
+ p = s;
+ do {
+ *p++ = '0'+(v%10);
+ v /= 10;
+ } while(v);
+ if (value < 0) *p++ = '-';
+
+ /* Compute length and add null term. */
+ l = p-s;
+ *p = '\0';
+
+ /* Reverse the string. */
+ p--;
+ while(s < p) {
+ aux = *s;
+ *s = *p;
+ *p = aux;
+ s++;
+ p--;
+ }
+ return l;
+}
+
+/* Identical sdsll2str(), but for unsigned long long type. */
+int sdsull2str(char *s, unsigned long long v) {
+ char *p, aux;
+ size_t l;
+
+ /* Generate the string representation, this method produces
+ * an reversed string. */
+ p = s;
+ do {
+ *p++ = '0'+(v%10);
+ v /= 10;
+ } while(v);
+
+ /* Compute length and add null term. */
+ l = p-s;
+ *p = '\0';
+
+ /* Reverse the string. */
+ p--;
+ while(s < p) {
+ aux = *s;
+ *s = *p;
+ *p = aux;
+ s++;
+ p--;
+ }
+ return l;
+}
+
+/* Create an sds string from a long long value. It is much faster than:
+ *
+ * sdscatprintf(sdsempty(),"%lld\n", value);
+ */
+sds sdsfromlonglong(long long value) {
+ char buf[SDS_LLSTR_SIZE];
+ int len = sdsll2str(buf,value);
+
+ return sdsnewlen(buf,len);
+}
+
+/* Like sdscatprintf() but gets va_list instead of being variadic. */
+sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
+ va_list cpy;
+ char staticbuf[1024], *buf = staticbuf, *t;
+ size_t buflen = strlen(fmt)*2;
+
+ /* We try to start using a static buffer for speed.
+ * If not possible we revert to heap allocation. */
+ if (buflen > sizeof(staticbuf)) {
+ buf = s_malloc(buflen);
+ if (buf == NULL) return NULL;
+ } else {
+ buflen = sizeof(staticbuf);
+ }
+
+ /* Try with buffers two times bigger every time we fail to
+ * fit the string in the current buffer size. */
+ while(1) {
+ buf[buflen-2] = '\0';
+ va_copy(cpy,ap);
+ vsnprintf(buf, buflen, fmt, cpy);
+ va_end(cpy);
+ if (buf[buflen-2] != '\0') {
+ if (buf != staticbuf) s_free(buf);
+ buflen *= 2;
+ buf = s_malloc(buflen);
+ if (buf == NULL) return NULL;
+ continue;
+ }
+ break;
+ }
+
+ /* Finally concat the obtained string to the SDS string and return it. */
+ t = sdscat(s, buf);
+ if (buf != staticbuf) s_free(buf);
+ return t;
+}
+
+/* Append to the sds string 's' a string obtained using printf-alike format
+ * specifier.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call.
+ *
+ * Example:
+ *
+ * s = sdsnew("Sum is: ");
+ * s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
+ *
+ * Often you need to create a string from scratch with the printf-alike
+ * format. When this is the need, just use sdsempty() as the target string:
+ *
+ * s = sdscatprintf(sdsempty(), "... your format ...", args);
+ */
+sds sdscatprintf(sds s, const char *fmt, ...) {
+ va_list ap;
+ char *t;
+ va_start(ap, fmt);
+ t = sdscatvprintf(s,fmt,ap);
+ va_end(ap);
+ return t;
+}
+
+/* This function is similar to sdscatprintf, but much faster as it does
+ * not rely on sprintf() family functions implemented by the libc that
+ * are often very slow. Moreover directly handling the sds string as
+ * new data is concatenated provides a performance improvement.
+ *
+ * However this function only handles an incompatible subset of printf-alike
+ * format specifiers:
+ *
+ * %s - C String
+ * %S - SDS string
+ * %i - signed int
+ * %I - 64 bit signed integer (long long, int64_t)
+ * %u - unsigned int
+ * %U - 64 bit unsigned integer (unsigned long long, uint64_t)
+ * %% - Verbatim "%" character.
+ */
+sds sdscatfmt(sds s, char const *fmt, ...) {
+ size_t initlen = sdslen(s);
+ const char *f = fmt;
+ long i;
+ va_list ap;
+
+ /* To avoid continuous reallocations, let's start with a buffer that
+ * can hold at least two times the format string itself. It's not the
+ * best heuristic but seems to work in practice. */
+ s = sdsMakeRoomFor(s, initlen + strlen(fmt)*2);
+ va_start(ap,fmt);
+ f = fmt; /* Next format specifier byte to process. */
+ i = initlen; /* Position of the next byte to write to dest str. */
+ while(*f) {
+ char next, *str;
+ size_t l;
+ long long num;
+ unsigned long long unum;
+
+ /* Make sure there is always space for at least 1 char. */
+ if (sdsavail(s)==0) {
+ s = sdsMakeRoomFor(s,1);
+ }
+
+ switch(*f) {
+ case '%':
+ next = *(f+1);
+ f++;
+ switch(next) {
+ case 's':
+ case 'S':
+ str = va_arg(ap,char*);
+ l = (next == 's') ? strlen(str) : sdslen(str);
+ if (sdsavail(s) < l) {
+ s = sdsMakeRoomFor(s,l);
+ }
+ memcpy(s+i,str,l);
+ sdsinclen(s,l);
+ i += l;
+ break;
+ case 'i':
+ case 'I':
+ if (next == 'i')
+ num = va_arg(ap,int);
+ else
+ num = va_arg(ap,long long);
+ {
+ char buf[SDS_LLSTR_SIZE];
+ l = sdsll2str(buf,num);
+ if (sdsavail(s) < l) {
+ s = sdsMakeRoomFor(s,l);
+ }
+ memcpy(s+i,buf,l);
+ sdsinclen(s,l);
+ i += l;
+ }
+ break;
+ case 'u':
+ case 'U':
+ if (next == 'u')
+ unum = va_arg(ap,unsigned int);
+ else
+ unum = va_arg(ap,unsigned long long);
+ {
+ char buf[SDS_LLSTR_SIZE];
+ l = sdsull2str(buf,unum);
+ if (sdsavail(s) < l) {
+ s = sdsMakeRoomFor(s,l);
+ }
+ memcpy(s+i,buf,l);
+ sdsinclen(s,l);
+ i += l;
+ }
+ break;
+ default: /* Handle %% and generally %<unknown>. */
+ s[i++] = next;
+ sdsinclen(s,1);
+ break;
+ }
+ break;
+ default:
+ s[i++] = *f;
+ sdsinclen(s,1);
+ break;
+ }
+ f++;
+ }
+ va_end(ap);
+
+ /* Add null-term */
+ s[i] = '\0';
+ return s;
+}
+
+/* Remove the part of the string from left and from right composed just of
+ * contiguous characters found in 'cset', that is a null terminted C string.
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call.
+ *
+ * Example:
+ *
+ * s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
+ * s = sdstrim(s,"Aa. :");
+ * printf("%s\n", s);
+ *
+ * Output will be just "HelloWorld".
+ */
+sds sdstrim(sds s, const char *cset) {
+ char *start, *end, *sp, *ep;
+ size_t len;
+
+ sp = start = s;
+ ep = end = s+sdslen(s)-1;
+ while(sp <= end && strchr(cset, *sp)) sp++;
+ while(ep > sp && strchr(cset, *ep)) ep--;
+ len = (sp > ep) ? 0 : ((ep-sp)+1);
+ if (s != sp) memmove(s, sp, len);
+ s[len] = '\0';
+ sdssetlen(s,len);
+ return s;
+}
+
+/* Turn the string into a smaller (or equal) string containing only the
+ * substring specified by the 'start' and 'end' indexes.
+ *
+ * start and end can be negative, where -1 means the last character of the
+ * string, -2 the penultimate character, and so forth.
+ *
+ * The interval is inclusive, so the start and end characters will be part
+ * of the resulting string.
+ *
+ * The string is modified in-place.
+ *
+ * Example:
+ *
+ * s = sdsnew("Hello World");
+ * sdsrange(s,1,-1); => "ello World"
+ */
+void sdsrange(sds s, ssize_t start, ssize_t end) {
+ size_t newlen, len = sdslen(s);
+
+ if (len == 0) return;
+ if (start < 0) {
+ start = len+start;
+ if (start < 0) start = 0;
+ }
+ if (end < 0) {
+ end = len+end;
+ if (end < 0) end = 0;
+ }
+ newlen = (start > end) ? 0 : (end-start)+1;
+ if (newlen != 0) {
+ if (start >= (ssize_t)len) {
+ newlen = 0;
+ } else if (end >= (ssize_t)len) {
+ end = len-1;
+ newlen = (start > end) ? 0 : (end-start)+1;
+ }
+ } else {
+ start = 0;
+ }
+ if (start && newlen) memmove(s, s+start, newlen);
+ s[newlen] = 0;
+ sdssetlen(s,newlen);
+}
+
+/* Apply tolower() to every character of the sds string 's'. */
+void sdstolower(sds s) {
+ size_t len = sdslen(s), j;
+
+ for (j = 0; j < len; j++) s[j] = tolower(s[j]);
+}
+
+/* Apply toupper() to every character of the sds string 's'. */
+void sdstoupper(sds s) {
+ size_t len = sdslen(s), j;
+
+ for (j = 0; j < len; j++) s[j] = toupper(s[j]);
+}
+
+/* Compare two sds strings s1 and s2 with memcmp().
+ *
+ * Return value:
+ *
+ * positive if s1 > s2.
+ * negative if s1 < s2.
+ * 0 if s1 and s2 are exactly the same binary string.
+ *
+ * If two strings share exactly the same prefix, but one of the two has
+ * additional characters, the longer string is considered to be greater than
+ * the smaller one. */
+int sdscmp(const sds s1, const sds s2) {
+ size_t l1, l2, minlen;
+ int cmp;
+
+ l1 = sdslen(s1);
+ l2 = sdslen(s2);
+ minlen = (l1 < l2) ? l1 : l2;
+ cmp = memcmp(s1,s2,minlen);
+ if (cmp == 0) return l1>l2? 1: (l1<l2? -1: 0);
+ return cmp;
+}
+
+/* Split 's' with separator in 'sep'. An array
+ * of sds strings is returned. *count will be set
+ * by reference to the number of tokens returned.
+ *
+ * On out of memory, zero length string, zero length
+ * separator, NULL is returned.
+ *
+ * Note that 'sep' is able to split a string using
+ * a multi-character separator. For example
+ * sdssplit("foo_-_bar","_-_"); will return two
+ * elements "foo" and "bar".
+ *
+ * This version of the function is binary-safe but
+ * requires length arguments. sdssplit() is just the
+ * same function but for zero-terminated strings.
+ */
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count) {
+ int elements = 0, slots = 5;
+ long start = 0, j;
+ sds *tokens;
+
+ if (seplen < 1 || len < 0) return NULL;
+
+ tokens = s_malloc(sizeof(sds)*slots);
+ if (tokens == NULL) return NULL;
+
+ if (len == 0) {
+ *count = 0;
+ return tokens;
+ }
+ for (j = 0; j < (len-(seplen-1)); j++) {
+ /* make sure there is room for the next element and the final one */
+ if (slots < elements+2) {
+ sds *newtokens;
+
+ slots *= 2;
+ newtokens = s_realloc(tokens,sizeof(sds)*slots);
+ if (newtokens == NULL) goto cleanup;
+ tokens = newtokens;
+ }
+ /* search the separator */
+ if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {
+ tokens[elements] = sdsnewlen(s+start,j-start);
+ if (tokens[elements] == NULL) goto cleanup;
+ elements++;
+ start = j+seplen;
+ j = j+seplen-1; /* skip the separator */
+ }
+ }
+ /* Add the final element. We are sure there is room in the tokens array. */
+ tokens[elements] = sdsnewlen(s+start,len-start);
+ if (tokens[elements] == NULL) goto cleanup;
+ elements++;
+ *count = elements;
+ return tokens;
+
+cleanup:
+ {
+ int i;
+ for (i = 0; i < elements; i++) sdsfree(tokens[i]);
+ s_free(tokens);
+ *count = 0;
+ return NULL;
+ }
+}
+
+/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */
+void sdsfreesplitres(sds *tokens, int count) {
+ if (!tokens) return;
+ while(count--)
+ sdsfree(tokens[count]);
+ s_free(tokens);
+}
+
+/* Append to the sds string "s" an escaped string representation where
+ * all the non-printable characters (tested with isprint()) are turned into
+ * escapes in the form "\n\r\a...." or "\x<hex-number>".
+ *
+ * After the call, the modified sds string is no longer valid and all the
+ * references must be substituted with the new pointer returned by the call. */
+sds sdscatrepr(sds s, const char *p, size_t len) {
+ s = sdscatlen(s,"\"",1);
+ while(len--) {
+ switch(*p) {
+ case '\\':
+ case '"':
+ s = sdscatprintf(s,"\\%c",*p);
+ break;
+ case '\n': s = sdscatlen(s,"\\n",2); break;
+ case '\r': s = sdscatlen(s,"\\r",2); break;
+ case '\t': s = sdscatlen(s,"\\t",2); break;
+ case '\a': s = sdscatlen(s,"\\a",2); break;
+ case '\b': s = sdscatlen(s,"\\b",2); break;
+ default:
+ if (isprint(*p))
+ s = sdscatprintf(s,"%c",*p);
+ else
+ s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
+ break;
+ }
+ p++;
+ }
+ return sdscatlen(s,"\"",1);
+}
+
+/* Helper function for sdssplitargs() that returns non zero if 'c'
+ * is a valid hex digit. */
+int is_hex_digit(char c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F');
+}
+
+/* Helper function for sdssplitargs() that converts a hex digit into an
+ * integer from 0 to 15 */
+int hex_digit_to_int(char c) {
+ switch(c) {
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ case 'a': case 'A': return 10;
+ case 'b': case 'B': return 11;
+ case 'c': case 'C': return 12;
+ case 'd': case 'D': return 13;
+ case 'e': case 'E': return 14;
+ case 'f': case 'F': return 15;
+ default: return 0;
+ }
+}
+
+/* Split a line into arguments, where every argument can be in the
+ * following programming-language REPL-alike form:
+ *
+ * foo bar "newline are supported\n" and "\xff\x00otherstuff"
+ *
+ * The number of arguments is stored into *argc, and an array
+ * of sds is returned.
+ *
+ * The caller should free the resulting array of sds strings with
+ * sdsfreesplitres().
+ *
+ * Note that sdscatrepr() is able to convert back a string into
+ * a quoted string in the same format sdssplitargs() is able to parse.
+ *
+ * The function returns the allocated tokens on success, even when the
+ * input string is empty, or NULL if the input contains unbalanced
+ * quotes or closed quotes followed by non space characters
+ * as in: "foo"bar or "foo'
+ */
+sds *sdssplitargs(const char *line, int *argc) {
+ const char *p = line;
+ char *current = NULL;
+ char **vector = NULL;
+
+ *argc = 0;
+ while(1) {
+ /* skip blanks */
+ while(*p && isspace(*p)) p++;
+ if (*p) {
+ /* get a token */
+ int inq=0; /* set to 1 if we are in "quotes" */
+ int insq=0; /* set to 1 if we are in 'single quotes' */
+ int done=0;
+
+ if (current == NULL) current = sdsempty();
+ while(!done) {
+ if (inq) {
+ if (*p == '\\' && *(p+1) == 'x' &&
+ is_hex_digit(*(p+2)) &&
+ is_hex_digit(*(p+3)))
+ {
+ unsigned char byte;
+
+ byte = (hex_digit_to_int(*(p+2))*16)+
+ hex_digit_to_int(*(p+3));
+ current = sdscatlen(current,(char*)&byte,1);
+ p += 3;
+ } else if (*p == '\\' && *(p+1)) {
+ char c;
+
+ p++;
+ switch(*p) {
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'b': c = '\b'; break;
+ case 'a': c = '\a'; break;
+ default: c = *p; break;
+ }
+ current = sdscatlen(current,&c,1);
+ } else if (*p == '"') {
+ /* closing quote must be followed by a space or
+ * nothing at all. */
+ if (*(p+1) && !isspace(*(p+1))) goto err;
+ done=1;
+ } else if (!*p) {
+ /* unterminated quotes */
+ goto err;
+ } else {
+ current = sdscatlen(current,p,1);
+ }
+ } else if (insq) {
+ if (*p == '\\' && *(p+1) == '\'') {
+ p++;
+ current = sdscatlen(current,"'",1);
+ } else if (*p == '\'') {
+ /* closing quote must be followed by a space or
+ * nothing at all. */
+ if (*(p+1) && !isspace(*(p+1))) goto err;
+ done=1;
+ } else if (!*p) {
+ /* unterminated quotes */
+ goto err;
+ } else {
+ current = sdscatlen(current,p,1);
+ }
+ } else {
+ switch(*p) {
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\0':
+ done=1;
+ break;
+ case '"':
+ inq=1;
+ break;
+ case '\'':
+ insq=1;
+ break;
+ default:
+ current = sdscatlen(current,p,1);
+ break;
+ }
+ }
+ if (*p) p++;
+ }
+ /* add the token to the vector */
+ vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
+ vector[*argc] = current;
+ (*argc)++;
+ current = NULL;
+ } else {
+ /* Even on empty input string return something not NULL. */
+ if (vector == NULL) vector = s_malloc(sizeof(void*));
+ return vector;
+ }
+ }
+
+err:
+ while((*argc)--)
+ sdsfree(vector[*argc]);
+ s_free(vector);
+ if (current) sdsfree(current);
+ *argc = 0;
+ return NULL;
+}
+
+/* Modify the string substituting all the occurrences of the set of
+ * characters specified in the 'from' string to the corresponding character
+ * in the 'to' array.
+ *
+ * For instance: sdsmapchars(mystring, "ho", "01", 2)
+ * will have the effect of turning the string "hello" into "0ell1".
+ *
+ * The function returns the sds string pointer, that is always the same
+ * as the input pointer since no resize is needed. */
+sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) {
+ size_t j, i, l = sdslen(s);
+
+ for (j = 0; j < l; j++) {
+ for (i = 0; i < setlen; i++) {
+ if (s[j] == from[i]) {
+ s[j] = to[i];
+ break;
+ }
+ }
+ }
+ return s;
+}
+
+/* Join an array of C strings using the specified separator (also a C string).
+ * Returns the result as an sds string. */
+sds sdsjoin(char **argv, int argc, char *sep) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscat(join, argv[j]);
+ if (j != argc-1) join = sdscat(join,sep);
+ }
+ return join;
+}
+
+/* Like sdsjoin, but joins an array of SDS strings. */
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscatsds(join, argv[j]);
+ if (j != argc-1) join = sdscatlen(join,sep,seplen);
+ }
+ return join;
+}
+
+/* Wrappers to the allocators used by SDS. Note that SDS will actually
+ * just use the macros defined into sdsalloc.h in order to avoid to pay
+ * the overhead of function calls. Here we define these wrappers only for
+ * the programs SDS is linked to, if they want to touch the SDS internals
+ * even if they use a different allocator. */
+void *sds_malloc(size_t size) { return s_malloc(size); }
+void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
+void sds_free(void *ptr) { s_free(ptr); }
+
+#if defined(SDS_TEST_MAIN)
+#include <stdio.h>
+#include "testhelp.h"
+#include "limits.h"
+
+#define UNUSED(x) (void)(x)
+int sdsTest(void) {
+ {
+ sds x = sdsnew("foo"), y;
+
+ test_cond("Create a string and obtain the length",
+ sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0)
+
+ sdsfree(x);
+ x = sdsnewlen("foo",2);
+ test_cond("Create a string with specified length",
+ sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0)
+
+ x = sdscat(x,"bar");
+ test_cond("Strings concatenation",
+ sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0);
+
+ x = sdscpy(x,"a");
+ test_cond("sdscpy() against an originally longer string",
+ sdslen(x) == 1 && memcmp(x,"a\0",2) == 0)
+
+ x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk");
+ test_cond("sdscpy() against an originally shorter string",
+ sdslen(x) == 33 &&
+ memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0)
+
+ sdsfree(x);
+ x = sdscatprintf(sdsempty(),"%d",123);
+ test_cond("sdscatprintf() seems working in the base case",
+ sdslen(x) == 3 && memcmp(x,"123\0",4) == 0)
+
+ sdsfree(x);
+ x = sdsnew("--");
+ x = sdscatfmt(x, "Hello %s World %I,%I--", "Hi!", LLONG_MIN,LLONG_MAX);
+ test_cond("sdscatfmt() seems working in the base case",
+ sdslen(x) == 60 &&
+ memcmp(x,"--Hello Hi! World -9223372036854775808,"
+ "9223372036854775807--",60) == 0)
+ printf("[%s]\n",x);
+
+ sdsfree(x);
+ x = sdsnew("--");
+ x = sdscatfmt(x, "%u,%U--", UINT_MAX, ULLONG_MAX);
+ test_cond("sdscatfmt() seems working with unsigned numbers",
+ sdslen(x) == 35 &&
+ memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," x");
+ test_cond("sdstrim() works when all chars match",
+ sdslen(x) == 0)
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," ");
+ test_cond("sdstrim() works when a single char remains",
+ sdslen(x) == 1 && x[0] == 'x')
+
+ sdsfree(x);
+ x = sdsnew("xxciaoyyy");
+ sdstrim(x,"xy");
+ test_cond("sdstrim() correctly trims characters",
+ sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0)
+
+ y = sdsdup(x);
+ sdsrange(y,1,1);
+ test_cond("sdsrange(...,1,1)",
+ sdslen(y) == 1 && memcmp(y,"i\0",2) == 0)
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,1,-1);
+ test_cond("sdsrange(...,1,-1)",
+ sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,-2,-1);
+ test_cond("sdsrange(...,-2,-1)",
+ sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0)
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,2,1);
+ test_cond("sdsrange(...,2,1)",
+ sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,1,100);
+ test_cond("sdsrange(...,1,100)",
+ sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0)
+
+ sdsfree(y);
+ y = sdsdup(x);
+ sdsrange(y,100,100);
+ test_cond("sdsrange(...,100,100)",
+ sdslen(y) == 0 && memcmp(y,"\0",1) == 0)
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnew("foo");
+ y = sdsnew("foa");
+ test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0)
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnew("bar");
+ y = sdsnew("bar");
+ test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0)
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnew("aar");
+ y = sdsnew("bar");
+ test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0)
+
+ sdsfree(y);
+ sdsfree(x);
+ x = sdsnewlen("\a\n\0foo\r",7);
+ y = sdscatrepr(sdsempty(),x,sdslen(x));
+ test_cond("sdscatrepr(...data...)",
+ memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
+
+ {
+ unsigned int oldfree;
+ char *p;
+ int step = 10, j, i;
+
+ sdsfree(x);
+ sdsfree(y);
+ x = sdsnew("0");
+ test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0);
+
+ /* Run the test a few times in order to hit the first two
+ * SDS header types. */
+ for (i = 0; i < 10; i++) {
+ int oldlen = sdslen(x);
+ x = sdsMakeRoomFor(x,step);
+ int type = x[-1]&SDS_TYPE_MASK;
+
+ test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen);
+ if (type != SDS_TYPE_5) {
+ test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step);
+ oldfree = sdsavail(x);
+ }
+ p = x+oldlen;
+ for (j = 0; j < step; j++) {
+ p[j] = 'A'+j;
+ }
+ sdsIncrLen(x,step);
+ }
+ test_cond("sdsMakeRoomFor() content",
+ memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0);
+ test_cond("sdsMakeRoomFor() final length",sdslen(x)==101);
+
+ sdsfree(x);
+ }
+ }
+ test_report()
+ return 0;
+}
+#endif
+
+#ifdef SDS_TEST_MAIN
+int main(void) {
+ return sdsTest();
+}
+#endif
diff --git a/util/sds/sds.h b/util/sds/sds.h
new file mode 100644
index 0000000..adcc12c
--- /dev/null
+++ b/util/sds/sds.h
@@ -0,0 +1,274 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SDS_H
+#define __SDS_H
+
+#define SDS_MAX_PREALLOC (1024*1024)
+extern const char *SDS_NOINIT;
+
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdint.h>
+
+typedef char *sds;
+
+/* Note: sdshdr5 is never used, we just access the flags byte directly.
+ * However is here to document the layout of type 5 SDS strings. */
+struct __attribute__ ((__packed__)) sdshdr5 {
+ unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr8 {
+ uint8_t len; /* used */
+ uint8_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr16 {
+ uint16_t len; /* used */
+ uint16_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr32 {
+ uint32_t len; /* used */
+ uint32_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr64 {
+ uint64_t len; /* used */
+ uint64_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+
+#define SDS_TYPE_5 0
+#define SDS_TYPE_8 1
+#define SDS_TYPE_16 2
+#define SDS_TYPE_32 3
+#define SDS_TYPE_64 4
+#define SDS_TYPE_MASK 7
+#define SDS_TYPE_BITS 3
+#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
+#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
+#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
+
+static inline size_t sdslen(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->len;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->len;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->len;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->len;
+ }
+ return 0;
+}
+
+static inline size_t sdsavail(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ return 0;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ return sh->alloc - sh->len;
+ }
+ }
+ return 0;
+}
+
+static inline void sdssetlen(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len = newlen;
+ break;
+ }
+}
+
+static inline void sdsinclen(sds s, size_t inc) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len += inc;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len += inc;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len += inc;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len += inc;
+ break;
+ }
+}
+
+/* sdsalloc() = sdsavail() + sdslen() */
+static inline size_t sdsalloc(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->alloc;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->alloc;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->alloc;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->alloc;
+ }
+ return 0;
+}
+
+static inline void sdssetalloc(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ /* Nothing to do, this type has no total allocation info. */
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->alloc = newlen;
+ break;
+ }
+}
+
+sds sdsnewlen(const void *init, size_t initlen);
+sds sdsnew(const char *init);
+sds sdsempty(void);
+sds sdsdup(const sds s);
+void sdsfree(sds s);
+sds sdsgrowzero(sds s, size_t len);
+sds sdscatlen(sds s, const void *t, size_t len);
+sds sdscat(sds s, const char *t);
+sds sdscatsds(sds s, const sds t);
+sds sdscpylen(sds s, const char *t, size_t len);
+sds sdscpy(sds s, const char *t);
+
+sds sdscatvprintf(sds s, const char *fmt, va_list ap);
+#ifdef __GNUC__
+sds sdscatprintf(sds s, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+sds sdscatprintf(sds s, const char *fmt, ...);
+#endif
+
+sds sdscatfmt(sds s, char const *fmt, ...);
+sds sdstrim(sds s, const char *cset);
+void sdsrange(sds s, ssize_t start, ssize_t end);
+void sdsupdatelen(sds s);
+void sdsclear(sds s);
+int sdscmp(const sds s1, const sds s2);
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);
+void sdsfreesplitres(sds *tokens, int count);
+void sdstolower(sds s);
+void sdstoupper(sds s);
+sds sdsfromlonglong(long long value);
+sds sdscatrepr(sds s, const char *p, size_t len);
+sds *sdssplitargs(const char *line, int *argc);
+sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
+sds sdsjoin(char **argv, int argc, char *sep);
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
+
+/* Low level functions exposed to the user API */
+sds sdsMakeRoomFor(sds s, size_t addlen);
+void sdsIncrLen(sds s, ssize_t incr);
+sds sdsRemoveFreeSpace(sds s);
+size_t sdsAllocSize(sds s);
+void *sdsAllocPtr(sds s);
+
+/* Export the allocator used by SDS to the program using SDS.
+ * Sometimes the program SDS is linked to, may use a different set of
+ * allocators, but may want to allocate or free things that SDS will
+ * respectively free or allocate. */
+void *sds_malloc(size_t size);
+void *sds_realloc(void *ptr, size_t size);
+void sds_free(void *ptr);
+
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[]);
+#endif
+
+#endif
diff --git a/util/sds/sdsalloc.h b/util/sds/sdsalloc.h
new file mode 100644
index 0000000..f43023c
--- /dev/null
+++ b/util/sds/sdsalloc.h
@@ -0,0 +1,42 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* SDS allocator selection.
+ *
+ * This file is used in order to change the SDS allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#define s_malloc malloc
+#define s_realloc realloc
+#define s_free free
diff --git a/util/util.h b/util/util.h
new file mode 100644
index 0000000..ef378ca
--- /dev/null
+++ b/util/util.h
@@ -0,0 +1,78 @@
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#if defined(__GNUC__) || defined(__clang__)
+# define NORETURN __attribute__((__noreturn__))
+#else
+# define NORETURN
+#endif
+
+#if defined(__GNUC__)
+# define UNUSED __attribute__((__unused__))
+#else
+# define UNUSED
+#endif
+
+#define N_ENTRIES(x) (sizeof(x) / sizeof(*x))
+#define member_size(type, member) sizeof(((type *)(0))->member)
+#define TYPEOF(x) __typeof__(x)
+
+#define PACKED __attribute__((__packed__))
+
+#if defined(__GNUC__)
+# define FALLTHROUGH __attribute__ ((fallthrough))
+#else
+# define FALLTHROUGH
+#endif
+
+NORETURN void die(const char *format, ...) __attribute__((format (printf, 1, 2)));
+NORETURN void die_errno(const char *format, ...) __attribute__((format (printf, 1, 2)));
+
+void err(const char * format, ...) __attribute__((format (printf, 1, 2)));
+void err_errno(const char * format, ...) __attribute__((format (printf, 1, 2)));
+
+#define XREALLOC_ARRAY(x, alloc) ((x) = xrealloc((x), (alloc) * sizeof(*(x))))
+#define ALLOC_ARRAY(x, n) (malloc((n) * sizeof(*x)))
+
+#define ALIGN_ADDR_MASK(addr, mask) \
+ ((addr + ((mask) - 1)) & ~((mask) - 1))
+
+#define ALIGN_ADDR_16(addr) ALIGN_ADDR_MASK(addr, 0x10)
+#define ALIGN_ADDR_32(addr) ALIGN_ADDR_MASK(addr, 0x20)
+#define ALIGN_ADDR_64(addr) ALIGN_ADDR_MASK(addr, 0x40)
+#define ALIGN_ADDR(addr, bytes) ALIGN_ADDR_MASK(addr, (8ULL*(bytes)))
+
+// Scaling: 0, 24, 66, 140, 269, 495, 890
+#define ALLOC_NC(x) (((x)+14)*7/4)
+
+#define AM_RESIZE_ALLOC(buf, sz, cap, item_sz, alloc_nc) \
+ do { \
+ TYPEOF(sz) _sz = (sz); \
+ if((cap) < _sz) { \
+ void * nb; \
+ TYPEOF(cap) nc; \
+ if(_sz < alloc_nc((cap))) { \
+ nc = alloc_nc((cap)); \
+ } else { \
+ nc = _sz; \
+ } \
+ nb = realloc((buf), (item_sz) * nc); \
+ if(nb) { \
+ (cap) = nc; \
+ (buf) = nb; \
+ } else { \
+ free((buf)); \
+ (buf) = NULL; \
+ } \
+ } \
+ } while(0)
+
+#define AM_RESIZE(buf, sz, cap) \
+ AM_RESIZE_ALLOC(buf, sz, cap, sizeof(*buf), ALLOC_NC)
+
+#endif