Strings
In programming, a string describes the data type used to store sequences of characters, i.e. human-readable text.
String views (String slices)
Conventional strings in C are null-terminated sequences of bytes. Several problems arise from this, such as buffer overflow vulnerabilities and performance concerns, as the length of a string can only be gotten by traversing the entire sequence.
The approach of string views (or string slices) addresses these issues. Here, a string is defined by a pointer and a size.
String API
typedef struct string_t { char* data; int size; // NOTE needs to be signed, so no size_t } string_t; #define c_string(str) (string_t) { str, sizeof(str) } int string_valid (string_t string); // is size > 0? int string_match (string_t a, string_t b, int flags); // flags == case in-/sensitive int string_contains (string_t haystack, string_t needle); string_t string_substring (string_t src, size_t from, size_t to); string_t string_find_first (string_t haystack, string_t needle); string_t string_find_last (string_t haystack, string_t needle); string_t string_remove_prefix (string_t src, string_t prefix); string_t string_remove_suffix (string_t src, string_t suffix); string_t string_pop_first_split(string_t src, string_t split_delimiter); // returns e.g. 2020 for ("2020/01", "/") // if C11 #define string_pop_first_split_(src, split_delimiter) \ _Generic(split_delimiter, const char*: string_pop_first_split(src, c_string(split_delimiter) \ string_t: string_pop_first_split(src, c_string(split_delimiter))) #define for_str_split(iter, src, split_by) \ for (str macro_var(src_) = src, \ iter = str_pop_first_split(¯o_var(src_), split_by), \ macro_var(split_by_) = split_by; \ str_valid(macro_var(src_)); \ iter = str_pop_first_split(¯o_var(src_), macro_var(split_by_)))
String builder
A string builder is used to construct a string. To do this, it is equipped with an allocator (e.g. an arena) and an API that allows operations on the string.
typedef struct string_t { char* data; int size; // NOTE needs to be signed, so no size_t } string_t; #define c_string(str) (string_t) { str, sizeof(str) } int string_valid (string_t string); // is size > 0? int string_match (string_t a, string_t b, int flags); // flags == case in-/sensitive int string_contains (string_t haystack, string_t needle); string_t string_substring (string_t src, size_t from, size_t to); string_t string_find_first (string_t haystack, string_t needle); string_t string_find_last (string_t haystack, string_t needle); string_t string_remove_prefix (string_t src, string_t prefix); string_t string_remove_suffix (string_t src, string_t suffix); string_t string_pop_first_split(string_t src, string_t split_delimiter); // returns e.g. 2020 for ("2020/01", "/") // if C11 #define string_pop_first_split_(src, split_delimiter) \ _Generic(split_delimiter, const char*: string_pop_first_split(src, c_string(split_delimiter) \ string_t: string_pop_first_split(src, c_string(split_delimiter))) #define for_str_split(iter, src, split_by) \ for (str macro_var(src_) = src, \ iter = str_pop_first_split(¯o_var(src_), split_by), \ macro_var(split_by_) = split_by; \ str_valid(macro_var(src_)); \ iter = str_pop_first_split(¯o_var(src_), macro_var(split_by_))) typedef struct mem_arena_t mem_arena_t; typedef struct string_builder_t { char* data; size_t size; size_t capacity; mem_arena_t* arena; } string_builder_t; string_builder_t string_builder_create(size_t size, mem_arena_t* arena); void string_builder_append(string_builder_t* builder, string_t string); void string_builder_insert(string_builder_t* builder, string_t string, size_t at); void string_builder_remove(string_builder_t* builder, size_t from, size_t to); string_t string_builder_finish(string_builder_t* builder);
String API Usage Code
typedef struct string_t { char* data; int size; // NOTE needs to be signed, so no size_t } string_t; #define c_string(str) (string_t) { str, sizeof(str) } int string_valid (string_t string); // is size > 0? int string_match (string_t a, string_t b, int flags); // flags == case in-/sensitive int string_contains (string_t haystack, string_t needle); string_t string_substring (string_t src, size_t from, size_t to); string_t string_find_first (string_t haystack, string_t needle); string_t string_find_last (string_t haystack, string_t needle); string_t string_remove_prefix (string_t src, string_t prefix); string_t string_remove_suffix (string_t src, string_t suffix); string_t string_pop_first_split(string_t src, string_t split_delimiter); // returns e.g. 2020 for ("2020/01", "/") // if C11 #define string_pop_first_split_(src, split_delimiter) \ _Generic(split_delimiter, const char*: string_pop_first_split(src, c_string(split_delimiter) \ string_t: string_pop_first_split(src, c_string(split_delimiter))) #define for_str_split(iter, src, split_by) \ for (str macro_var(src_) = src, \ iter = str_pop_first_split(¯o_var(src_), split_by), \ macro_var(split_by_) = split_by; \ str_valid(macro_var(src_)); \ iter = str_pop_first_split(¯o_var(src_), macro_var(split_by_))) typedef struct mem_arena_t mem_arena_t; typedef struct string_builder_t { char* data; size_t size; size_t capacity; mem_arena_t* arena; } string_builder_t; string_builder_t string_builder_create(size_t size, mem_arena_t* arena); void string_builder_append(string_builder_t* builder, string_t string); void string_builder_insert(string_builder_t* builder, string_t string, size_t at); void string_builder_remove(string_builder_t* builder, size_t from, size_t to); string_t string_builder_finish(string_builder_t* builder); int string_valid (string_t string) { return string.size > 0 ? 1 : 0; } int main() { string_t hello = c_string("Hello World"); string_t test = c_string(""); string_t test2 = {NULL, 0}; if (string_valid(hello)) { printf("1: %s, size: %i\n", hello.data, hello.size); } if (string_valid(test)) { printf("2: %s, size: %i\n", test.data, test.size); } if (string_valid(test2)) { printf("3: %s, size: %i\n", test2.data, test2.size); } }