andersch.dev

<2024-01-08 Mon>

Strings

In programming, a string describes the data type used to store sequences of characters, i.e. human-readable text.

String views (String slices)

Conventional strings in C are null-terminated sequences of bytes. Several problems arise from this, such as buffer overflow vulnerabilities and performance concerns, as the length of a string can only be gotten by traversing the entire sequence.

The approach of string views (or string slices) addresses these issues. Here, a string is defined by a pointer and a size.

String API

typedef struct string_t
{
    char*  data;
    int    size; // NOTE needs to be signed, so no size_t
} string_t;

#define c_string(str) (string_t) { str, sizeof(str) }

int      string_valid          (string_t string);                        // is size > 0?
int      string_match          (string_t a, string_t b, int flags);      // flags == case in-/sensitive
int      string_contains       (string_t haystack, string_t needle);
string_t string_substring      (string_t src, size_t from, size_t to);
string_t string_find_first     (string_t haystack, string_t needle);
string_t string_find_last      (string_t haystack, string_t needle);
string_t string_remove_prefix  (string_t src, string_t prefix);
string_t string_remove_suffix  (string_t src, string_t suffix);
string_t string_pop_first_split(string_t src, string_t split_delimiter); // returns e.g. 2020 for ("2020/01", "/")

// if C11
#define string_pop_first_split_(src, split_delimiter) \
    _Generic(split_delimiter, const char*: string_pop_first_split(src, c_string(split_delimiter) \
                              string_t:    string_pop_first_split(src, c_string(split_delimiter)))

#define for_str_split(iter, src, split_by)                                    \
  for (str macro_var(src_) = src,                                             \
       iter = str_pop_first_split(&macro_var(src_), split_by),                \
       macro_var(split_by_) = split_by;                                       \
       str_valid(macro_var(src_));                                            \
       iter = str_pop_first_split(&macro_var(src_), macro_var(split_by_)))

String builder

A string builder is used to construct a string. To do this, it is equipped with an allocator (e.g. an arena) and an API that allows operations on the string.

typedef struct string_t
{
    char*  data;
    int    size; // NOTE needs to be signed, so no size_t
} string_t;

#define c_string(str) (string_t) { str, sizeof(str) }

int      string_valid          (string_t string);                        // is size > 0?
int      string_match          (string_t a, string_t b, int flags);      // flags == case in-/sensitive
int      string_contains       (string_t haystack, string_t needle);
string_t string_substring      (string_t src, size_t from, size_t to);
string_t string_find_first     (string_t haystack, string_t needle);
string_t string_find_last      (string_t haystack, string_t needle);
string_t string_remove_prefix  (string_t src, string_t prefix);
string_t string_remove_suffix  (string_t src, string_t suffix);
string_t string_pop_first_split(string_t src, string_t split_delimiter); // returns e.g. 2020 for ("2020/01", "/")

// if C11
#define string_pop_first_split_(src, split_delimiter) \
    _Generic(split_delimiter, const char*: string_pop_first_split(src, c_string(split_delimiter) \
                              string_t:    string_pop_first_split(src, c_string(split_delimiter)))

#define for_str_split(iter, src, split_by)                                    \
  for (str macro_var(src_) = src,                                             \
       iter = str_pop_first_split(&macro_var(src_), split_by),                \
       macro_var(split_by_) = split_by;                                       \
       str_valid(macro_var(src_));                                            \
       iter = str_pop_first_split(&macro_var(src_), macro_var(split_by_)))
typedef struct mem_arena_t mem_arena_t;
typedef struct string_builder_t
{
    char*        data;
    size_t       size;
    size_t       capacity;
    mem_arena_t* arena;
} string_builder_t;

string_builder_t string_builder_create(size_t size, mem_arena_t* arena);
void             string_builder_append(string_builder_t* builder, string_t string);
void             string_builder_insert(string_builder_t* builder, string_t string, size_t at);
void             string_builder_remove(string_builder_t* builder, size_t from, size_t to);
string_t         string_builder_finish(string_builder_t* builder);

String API Usage Code

typedef struct string_t
{
    char*  data;
    int    size; // NOTE needs to be signed, so no size_t
} string_t;

#define c_string(str) (string_t) { str, sizeof(str) }

int      string_valid          (string_t string);                        // is size > 0?
int      string_match          (string_t a, string_t b, int flags);      // flags == case in-/sensitive
int      string_contains       (string_t haystack, string_t needle);
string_t string_substring      (string_t src, size_t from, size_t to);
string_t string_find_first     (string_t haystack, string_t needle);
string_t string_find_last      (string_t haystack, string_t needle);
string_t string_remove_prefix  (string_t src, string_t prefix);
string_t string_remove_suffix  (string_t src, string_t suffix);
string_t string_pop_first_split(string_t src, string_t split_delimiter); // returns e.g. 2020 for ("2020/01", "/")

// if C11
#define string_pop_first_split_(src, split_delimiter) \
    _Generic(split_delimiter, const char*: string_pop_first_split(src, c_string(split_delimiter) \
                              string_t:    string_pop_first_split(src, c_string(split_delimiter)))

#define for_str_split(iter, src, split_by)                                    \
  for (str macro_var(src_) = src,                                             \
       iter = str_pop_first_split(&macro_var(src_), split_by),                \
       macro_var(split_by_) = split_by;                                       \
       str_valid(macro_var(src_));                                            \
       iter = str_pop_first_split(&macro_var(src_), macro_var(split_by_)))
typedef struct mem_arena_t mem_arena_t;
typedef struct string_builder_t
{
    char*        data;
    size_t       size;
    size_t       capacity;
    mem_arena_t* arena;
} string_builder_t;

string_builder_t string_builder_create(size_t size, mem_arena_t* arena);
void             string_builder_append(string_builder_t* builder, string_t string);
void             string_builder_insert(string_builder_t* builder, string_t string, size_t at);
void             string_builder_remove(string_builder_t* builder, size_t from, size_t to);
string_t         string_builder_finish(string_builder_t* builder);
int      string_valid          (string_t string) { return string.size > 0 ? 1 : 0; }

int main()
{
    string_t hello  = c_string("Hello World");
    string_t test   = c_string("");
    string_t test2  = {NULL, 0};
    if (string_valid(hello)) { printf("1: %s, size: %i\n", hello.data, hello.size); }
    if (string_valid(test))  { printf("2: %s, size: %i\n", test.data,  test.size); }
    if (string_valid(test2)) { printf("3: %s, size: %i\n", test2.data, test2.size); }
}

Resources