Efficient C String Builder

One task that always annoys me when I work with C is building strings. `snprintf` is all well and good if I know exactly the format I want. It falls down with anything that needs to be build iteratively and dynamically. Other languages that have built in strings will automatically create a new string when concatenating but C doesn’t actually have “strings”.

I realize that concatenating strings is really inefficient and should be avoided for all but the smallest and simplest string. To deal with this, a few other languages (Java, .Net) have StringBuilder objects which are very efficient. To make things easier on myself I wrote a simple string builder in C.

str_builder.h

#ifndef __STR_BUILDER_H__
#define __STR_BUILDER_H__

#include <stddef.h>

/*! addtogroup str_builder String Builder
 *
 * A mutable string of characters used to dynamically build a string.
 *
 * @{
 */

struct str_builder;
typedef struct str_builder str_builder_t;

/* - - - - */

/*! Create a str builder.
 *
 * return str builder.
 */
str_builder_t *str_builder_create(void);

/*! Destroy a str builder.
 *
 * param[in,out] sb Builder.
 */
void str_builder_destroy(str_builder_t *sb);

/* - - - - */

/*! Add a string to the builder.
 *
 * param[in,out] sb  Builder.
 * param[in]     str String to add.
 * param[in]     len Length of string to add. If 0, strlen will be called
 *                internally to determine length.
 */
void str_builder_add_str(str_builder_t *sb, const char *str, size_t len);

/*! Add a character to the builder.
 *
 * param[in,out] sb Builder.
 * param[in]     c  Character.
 */
void str_builder_add_char(str_builder_t *sb, char c);

/*! Add an integer as to the builder.
 *
 * param[in,out] sb  Builder.
 * param[in]     val Int to add.
 */
void str_builder_add_int(str_builder_t *sb, int val);

/* - - - - */

/*! Clear the builder.
 *
 * param[in,out] sb  Builder.
 */
void str_builder_clear(str_builder_t *sb);

/*! Remove data from the end of the builder.
 *
 * param[in,out] sb  Builder.
 * param[in]     len The new length of the string.
 *                    Anything after this length is removed.
 */
void str_builder_truncate(str_builder_t *sb, size_t len);

/*! Remove data from the beginning of the builder.
 *
 * param[in,out] sb  Builder.
 * param[in]     len The length to remove.
 */
void str_builder_drop(str_builder_t *sb, size_t len);

/* - - - - */

/*! The length of the string contained in the builder.
 *
 * param[in] sb Builder.
 *
 * return Length.
 */
size_t str_builder_len(const str_builder_t *sb);

/*! A pointer to the internal buffer with the builder's string data.
 *
 * The data is guaranteed to be NULL terminated.
 *
 * param[in] sb Builder.
 *
 * return Pointer to internal string data.
 */
const char *str_builder_peek(const str_builder_t *sb);

/*! Return a copy of the string data.
 *
 * param[in]  sb  Builder.
 * param[out] len Length of returned data. Can be NULL if not needed.
 *
 * return Copy of the internal string data.
 */
char *str_builder_dump(const str_builder_t *sb, size_t *len);

/*! @}
 */

#endif /* __STR_BUILDER_H__ */

There are two different types of manipulation that’s supported. 1. Appending data. 2. Removing data. The builder only allows adding a few different types but it could be expanded for additional ones. The integer append is a good example of what can be done. As far as removing data goes, it’s beginning, end or all only. This doesn’t support insertion or removal in the middle because it’s unnecessarily complex and I’ve never seen a good use case for it.

The str builder object has a couple of guarantees. The internal data is always stored NULL terminated so it’s safe to use peek with something like `printf`. While this is intended for strings the length of the data in the object is known and adding a string optionally takes a length. So, you could use it with binary data.

str_builder.c

#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "str_builder.h"

/* - - - - */

static const size_t str_builder_min_size = 32;

struct str_builder {
    char   *str;
    size_t  alloced;
    size_t  len;
};

/* - - - - */

str_builder_t *str_builder_create(void)
{
    str_builder_t *sb;

    sb          = calloc(1, sizeof(*sb));
    sb->str     = malloc(str_builder_min_size);
    *sb->str    = '\0';
    sb->alloced = str_builder_min_size;
    sb->len     = 0;

    return sb;
}

void str_builder_destroy(str_builder_t *sb)
{
    if (sb == NULL)
        return;
    free(sb->str);
    free(sb);
}

/* - - - - */

/*! Ensure there is enough space for data being added plus a NULL terminator.
 *
 * param[in,out] sb      Builder.
 * param[in]     add_len The length that needs to be added *not* including a NULL terminator.
 */
static void str_builder_ensure_space(str_builder_t *sb, size_t add_len)
{
    if (sb == NULL || add_len == 0)
        return;

    if (sb->alloced >= sb->len+add_len+1)
        return;

    while (sb->alloced < sb->len+add_len+1) {
        /* Doubling growth strategy. */
        sb->alloced <<= 1;
        if (sb->alloced == 0) {
            /* Left shift of max bits will go to 0. An unsigned type set to
             * -1 will return the maximum possible size. However, we should
             *  have run out of memory well before we need to do this. Since
             *  this is the theoretical maximum total system memory we don't
             *  have a flag saying we can't grow any more because it should
             *  be impossible to get to this point. */
            sb->alloced--;
        }
    }
    sb->str = realloc(sb->str, sb->alloced);
}

/* - - - - */

void str_builder_add_str(str_builder_t *sb, const char *str, size_t len)
{
    if (sb == NULL || str == NULL || *str == '\0')
        return;

    if (len == 0)
        len = strlen(str);

    str_builder_ensure_space(sb, len);
    memmove(sb->str+sb->len, str, len);
    sb->len += len;
    sb->str[sb->len] = '\0';
}

void str_builder_add_char(str_builder_t *sb, char c)
{
    if (sb == NULL)
        return;
    str_builder_ensure_space(sb, 1);
    sb->str[sb->len] = c;
    sb->len++;
    sb->str[sb->len] = '\0';
}

void str_builder_add_int(str_builder_t *sb, int val)
{
    char str[12];

    if (sb == NULL)
        return;

    snprintf(str, sizeof(str), "%d", val);
    str_builder_add_str(sb, str, 0);
}

/* - - - - */

void str_builder_clear(str_builder_t *sb)
{
    if (sb == NULL)
        return;
    str_builder_truncate(sb, 0);
}

void str_builder_truncate(str_builder_t *sb, size_t len)
{
    if (sb == NULL || len >= sb->len)
        return;

    sb->len = len;
    sb->str[sb->len] = '\0';
}

void str_builder_drop(str_builder_t *sb, size_t len)
{
    if (sb == NULL || len == 0)
        return;

    if (len >= sb->len) {
        str_builder_clear(sb);
        return;
    }

    sb->len -= len;
    /* +1 to move the NULL. */
    memmove(sb->str, sb->str+len, sb->len+1);
}

/* - - - - */

size_t str_builder_len(const str_builder_t *sb)
{
    if (sb == NULL)
        return 0;
    return sb->len;
}

const char *str_builder_peek(const str_builder_t *sb)
{
    if (sb == NULL)
        return NULL;
    return sb->str;
}

char *str_builder_dump(const str_builder_t *sb, size_t *len)
{
    char *out;

    if (sb == NULL)
        return NULL;

    if (len != NULL)
        *len = sb->len;
    out = malloc(sb->len+1);
    memcpy(out, sb->str, sb->len+1);
    return out;
}

The implementation isn’t all that complex. It’s a buffer to store the data that dynamically grows as necessary. The internal buffer never shrinks because having it, I haven’t found a real benefit. If you reuse the builder to build multiple strings you’re going to add data to it after clearing, truncating, or dropping. If it shrunk, it would shrink then immediately grow once more data is added.

main.c

#include <stdio.h>
#include <stdlib.h>
#include "str_builder.h"

int main(int argc, char **argv)
{
    str_builder_t *sb;
    char           *str;

    sb = str_builder_create();
    printf("createn");
    printf("tsb len=%zuntsb='%s'n", str_builder_len(sb), str_builder_peek(sb));

    str_builder_add_str(sb, "Test", 0);
    printf("Add 'Test'n");
    printf("tsb len=%zuntsb='%s'n", str_builder_len(sb), str_builder_peek(sb));

    str_builder_add_str(sb, " ", 0);
    printf("Add ' 'n");
    printf("tsb len=%zuntsb='%s'n", str_builder_len(sb), str_builder_peek(sb));

    str_builder_add_int(sb, 123);
    printf("Add int 123n");
    printf("tsb len=%zuntsb='%s'n", str_builder_len(sb), str_builder_peek(sb));

    str_builder_truncate(sb, str_builder_len(sb)-2);
    printf("Truncate -2n");
    printf("tsb len=%zuntsb='%s'n", str_builder_len(sb), str_builder_peek(sb));

    str_builder_drop(sb, 3);
    printf("Drop 3n");
    printf("tsb len=%zuntsb='%s'n", str_builder_len(sb), str_builder_peek(sb));

    str = str_builder_dump(sb, NULL);
    printf("Dumpn");
    printf("tsb len=%zuntsb='%s'n", str_builder_len(sb), str_builder_peek(sb));
    printf("tstr='%s'n", str);
    free(str);

    str_builder_destroy(sb);
    return 0;
}

The obligatory test app to show this actually works. Oh, and its output.

create
	sb len=0
	sb=''
Add 'Test'
	sb len=4
	sb='Test'
Add ' '
	sb len=5
	sb='Test '
Add int 123
	sb len=8
	sb='Test 123'
Truncate -2
	sb len=6
	sb='Test 1'
Drop 3
	sb len=3
	sb='t 1'
Dump
	sb len=3
	sb='t 1'
	str='t 1'