andersch.dev

<2023-04-06 Thu>

Coroutine

Coroutines are "functions whose execution you can pause".

They can be symmetric or asymmetric. Symmetric coroutines have bidirectional control flow: both caller and callee (i.e. the coroutine) can yield control to each other. Asymmetric coroutines are the more common form: the caller invokes the coroutine and it runs until it yields control back to the caller.

Coroutines can also be differentiated by their management of the call stack. Stackful coroutines maintain their own call stack (meaning they can have local variables), while stackless coroutines share the call stack of the caller.

They can be used to implement linear state machines in a straight-forward manner ("Co-routines are to state machines what recursion is to stacks"1).

Stackless Coroutines in C using Macros

A form of Coroutines (more specifically, Protothreads) can be implemented in C using Duff's Device. This technique essentially works by saving the state of where a Coroutine was previously yielded and using a switch statement to jump back to that code (i.e. resuming the function).

This means the state for the coroutine is just an integer that keeps track of its last line of code (which corresponds to a case label on the same line). This integer is often called index, line, or sequence point.

Minimal Implementation

Minimal macro implementation

typedef struct coroutine_t {
    int   index;
    float elapsed; /* optional */
} coroutine_t;

#define COROUTINE_START(co)          do { switch (co->index)  { default:
#define COROUTINE_EXIT(co)           do { goto __co_end; } while (0)
#define COROUTINE_YIELD(co)          do { co->index = __LINE__; COROUTINE_EXIT(co); case __LINE__:; } while (0)
#define COROUTINE_END(co)            } co->index = 0; __co_end:; } while (0)

/* useful macros */
#define COROUTINE_WAIT(co, time, dt) do { case __LINE__: co->index = __LINE__; co->elapsed += dt; do { if (co->elapsed < time) { goto __co_end; } else { co->elapsed = 0; } } while (0); } while (0)
#define COROUTINE_CASE(co, name)     case __LINE__: name: co->index = __LINE__;

Usage code after macro expansion would look like this:

typedef struct coroutine_t {
    int   index;
    float elapsed; /* optional */
} coroutine_t;

#define COROUTINE_START(co)          do { switch (co->index)  { default:
#define COROUTINE_EXIT(co)           do { goto __co_end; } while (0)
#define COROUTINE_YIELD(co)          do { co->index = __LINE__; COROUTINE_EXIT(co); case __LINE__:; } while (0)
#define COROUTINE_END(co)            } co->index = 0; __co_end:; } while (0)

/* useful macros */
#define COROUTINE_WAIT(co, time, dt) do { case __LINE__: co->index = __LINE__; co->elapsed += dt; do { if (co->elapsed < time) { goto __co_end; } else { co->elapsed = 0; } } while (0); } while (0)
#define COROUTINE_CASE(co, name)     case __LINE__: name: co->index = __LINE__;
void some_function(coroutine_t* co)
{
  switch (co->index)  { default:                               // START
    printf("First\n");
    co->index = __LINE__; COROUTINE_EXIT(co); case __LINE__:;  // YIELD
    printf("Third\n");
  } co->index = 0; __co_end:;                                  // END
}

static coroutine_t co;
some_function(&co);
printf("Second\n");
some_function(&co);

TODO Macro Implementation without COROUTINEEND

Implementation without having to specify COROUTINEEND.

#include <stdio.h>

typedef struct coroutine_t
{
    float elapsed;
    int   index;
} coroutine_t;

#define coroutine(co)  coroutine_t* _co = co; switch (_co->index) default: if(0) _body: break; else for(int _i_##__LINE__ = (0); !_i_##__LINE__; (_i_##__LINE__ += 1), _co->index = 0)
#define yield          _co->index = __LINE__; goto _body; case __LINE__:;
#define case(name)     case __LINE__: name: _co->index = __LINE__;

/* helper */
#define wait(time, dt) case __LINE__: _co->index = __LINE__; _co->elapsed += dt; if (_co->elapsed < time) { goto _body; } else { _co->elapsed = 0; }

void coro_func(coroutine_t* co) {
    static int i = 2;

    coroutine(co)
    {
        printf("first\n");
        yield;

        printf("second\n");
        yield;

        if (i == 2) { goto test; }

        while(1) {
            i--;
            printf("in while\n");
            if (!i) break; // breaking works now
            yield;
        }

        case(test);
        {
            printf("test case\n");
            yield;
        }

        wait(1.0f, 0.3f);
        printf("third\n");
        goto test;
    }
}

int main() {
    coroutine_t co = {0};

    coro_func(&co);
    coro_func(&co);
    coro_func(&co); // 0.3f
    coro_func(&co); // 0.6f
    coro_func(&co); // 0.9f
    coro_func(&co); // 1.2f
    coro_func(&co); // 1.2f
    coro_func(&co); // 1.2f
}

Alternative Macro Implementation

// dependency: sokol_time.h
typedef struct Coroutine Coroutine;
struct Coroutine
{
    S32 line;
    U64 start_time; // used for CoroutineWait
    void* data;
};

#define CoroutineBegin(coro)                 switch (coro->line) {case 0: coro->line = 0;
#define CoroutineYield(coro)                 do { coro->line = __LINE__; return; case __LINE__:;} while(0)
#define CoroutineYieldUntil(coro, condition) while (!(condition)) { CoroutineYield(coro); }
#define CoroutineWait(coro, duration)        do {if (coro->start_time == 0.0f) { coro->start_time = stm_now(); } \
                                             CoroutineYieldUntil(coro, stm_sec(stm_now()) > stm_sec(coro->start_time) + (F64)duration); coro->start_time = 0; } while (0)
#define CoroutineEnd(coro)                   do { coro->line = __LINE__; } while (0); }
#define CoroutineReset(coro)                 do { coro->line = 0; } while (0); }
#define CoroutineIsFinished(coro)            (coro->line == -1)
switch (coro->line) {case 0: coro->line = 0;        // BEGIN
  coro->line = __LINE__; return; case __LINE__:;    // YIELD

  while (!(condition)) { CoroutineYield(coro); }    // YIELD UNTIL

  if (coro->start_time == 0.0f) { coro->start_time = stm_now(); }  // WAIT
  CoroutineYieldUntil(coro, stm_sec(stm_now()) > stm_sec(coro->start_time) + (F64)duration);
  coro->start_time = 0;

coro->line = __LINE__; }                            // END or..
coro->line = 0; }                                   // ...RESET

More Elaborate Macro Implementation

This implementation allows for every coroutine to have its own stack and allocate local variables from it and can allow nested coroutine (i.e a function using a coroutine passes that coroutine to another function using coroutines).

typedef struct
{
    float elapsed;                     // timer
    int flag;                          // ?
    int index;                         // code-line & case-label
    #define COROUTINE_MAX_DEPTH 8      // how deep into nested coroutines we go
    int line[COROUTINE_MAX_DEPTH];     // keeps track of the sequence point of nested coroutines
    int stack_pointer;                 // used for pushing on to the stack
    #define COROUTINE_STACK_SIZE (512) // static stack size for every coroutine
    char stack[COROUTINE_STACK_SIZE];  // the stack, usage optional
} coroutine_t;

#define COROUTINE_CASE_OFFSET (1024 * 1024)

static inline void coroutine_init(coroutine_t* co) /* Just zero out the coroutine */
{
    co->elapsed = 0; co->flag = 0; co->index = 0;
    for (int i = 0; i < COROUTINE_MAX_DEPTH; ++i) co->line[i] = 0;
    co->stack_pointer = 0;
}

/**
 * Pops off `size` bytes from `co`'s fixed-size buffer for use as a local variable.
 *
 * void do_work(coroutine_t* co)
 * {
 *     int* a = (int*)coroutine_local_var(co, sizeof(int));
 *     int* b = (int*)coroutine_local_var(co, sizeof(int));
 *
 *     COROUTINE_START(co); // Setup initial values here. This code runs just once.
 *     *a = 3; *b = 7;
 *     // ...
 *     COROUTINE_END(co);
 * }
 */
static inline void* coroutine_local_var(coroutine_t* co, int size)
{
    COROUTINE_ASSERT(co->stack_pointer + size < COROUTINE_STACK_SIZE);
    void* ptr = co->stack + co->stack_pointer;
    co->stack_pointer += (int)size;
    return ptr;
}

#define COROUTINE_START(co)          do { co->flag = 0; switch (co->line[co->index]) { default: /* default is 0, and will run just once. */
#define COROUTINE_CASE(co, name)     case __LINE__: name: co->line[co->index] = __LINE__;
#define COROUTINE_WAIT(co, time, dt) do { case __LINE__: co->line[co->index] = __LINE__; co->elapsed += dt; do { if (co->elapsed < time) { co->flag = 1; goto __co_end; } else { co->elapsed = 0; } } while (0); } while (0)
#define COROUTINE_EXIT(co)           do { co->flag = 1; goto __co_end; } while (0)
#define COROUTINE_YIELD(co)          do { co->line[co->index] = __LINE__; COROUTINE_EXIT(co); case __LINE__:; } while (0)
/* COROUTINE_CALL(co, call_some_other_func(co, my_params)); */
#define COROUTINE_CALL(co, ...)      co->flag = 0; case __LINE__: COROUTINE_ASSERT(co->index < COROUTINE_MAX_DEPTH); co->line[co->index++] = __LINE__; __VA_ARGS__; co->index--; do { if (co->flag) { goto __co_end; } else { case __LINE__ + COROUTINE_CASE_OFFSET: co->line[co->index] = __LINE__ + COROUTINE_CASE_OFFSET; } } while (0)
#define COROUTINE_END(co)            } co->line[co->index] = 0; __co_end:; co->stack_pointer = 0; } while (0)

Coroutines in C using setjmp

https://250bpm.com/blog:48

#include <setjmp.h>

#define STACK_SIZE 16384

volatile int unoptimisable_ = 1;

struct cr_ {
    struct cr_ *next;
    jmp_buf ctx;
};

struct cr_ main_cr_ = {NULL};

struct cr_ *first_cr_ = &main_cr_;
struct cr_ *last_cr_ = &main_cr_;

#define go(fn) \
    do {\
        if(!setjmp(first_cr_->ctx)) {\
            char *stack = malloc(STACK_SIZE);\
            int anchor_[unoptimisable_];\
            char filler_[(char*)&anchor_ - (char*)(stack + STACK_SIZE)];\
            struct cr_ cr[unoptimisable_];\
            cr->next = first_cr_;\
            first_cr_ = cr;\
            char *stack_[unoptimisable_];\
            stack_[0] = stack;\
            fn;\
            free(stack_[0]);\
            first_cr_ = first_cr_->next;\
            longjmp(first_cr_->ctx, 1);\
        }\
    } while(0)

void yield(void) {
    if(first_cr_ == last_cr_)
        return;
    if(setjmp(first_cr_->ctx))
        return;
    struct cr_ *cr = first_cr_;
    first_cr_ = cr->next;
    cr->next = NULL;
    last_cr_->next = cr;
    last_cr_ = cr;
    longjmp(first_cr_->ctx, 1);
}
#include <setjmp.h>

#define STACK_SIZE 16384

volatile int unoptimisable_ = 1;

struct cr_ {
    struct cr_ *next;
    jmp_buf ctx;
};

struct cr_ main_cr_ = {NULL};

struct cr_ *first_cr_ = &main_cr_;
struct cr_ *last_cr_ = &main_cr_;

#define go(fn) \
    do {\
        if(!setjmp(first_cr_->ctx)) {\
            char *stack = malloc(STACK_SIZE);\
            int anchor_[unoptimisable_];\
            char filler_[(char*)&anchor_ - (char*)(stack + STACK_SIZE)];\
            struct cr_ cr[unoptimisable_];\
            cr->next = first_cr_;\
            first_cr_ = cr;\
            char *stack_[unoptimisable_];\
            stack_[0] = stack;\
            fn;\
            free(stack_[0]);\
            first_cr_ = first_cr_->next;\
            longjmp(first_cr_->ctx, 1);\
        }\
    } while(0)

void yield(void) {
    if(first_cr_ == last_cr_)
        return;
    if(setjmp(first_cr_->ctx))
        return;
    struct cr_ *cr = first_cr_;
    first_cr_ = cr->next;
    cr->next = NULL;
    last_cr_->next = cr;
    last_cr_ = cr;
    longjmp(first_cr_->ctx, 1);
}

void foo(int count, const char *text) {
    int i;
    for(i = 0; i != count; ++i) {
        printf("%s\n", text);
        yield();
    }
}

int main() {
    go(foo(3, "a"));
    go(printf("Hello, %s!\n", "world"));
    go(foo(2, "b"));
    foo(5, "c");
    return 0;
}

Coroutines using Memory Arenas

Coroutine stack in back-buffered frame arena

  • probably need to pass in curr and prev frame arena
  • need to juggle stack between the two after each yield
  • maybe could be integrated into the coroutine macros
  • actually, do the juggle before coroutinebegin in a macro

every coroutine has its own subarena

  • needs to clear arena after return
  • means every coroutine needs an init where arena is alloc'ed

Resources

Footnotes