Coroutine
Coroutines are "functions whose execution you can pause".
They can be symmetric or asymmetric. Symmetric coroutines have bidirectional control flow: both caller and callee (i.e. the coroutine) can yield control to each other. Asymmetric coroutines are the more common form: the caller invokes the coroutine and it runs until it yields control back to the caller.
Coroutines can also be differentiated by their management of the call stack. Stackful coroutines maintain their own call stack (meaning they can have local variables), while stackless coroutines share the call stack of the caller.
They can be used to implement linear state machines in a straight-forward manner ("Co-routines are to state machines what recursion is to stacks"1).
Stackless Coroutines in C using Macros
A form of Coroutines (more specifically, Protothreads) can be implemented in C using Duff's Device. This technique essentially works by saving the state of where a Coroutine was previously yielded and using a switch statement to jump back to that code (i.e. resuming the function).
This means the state for the coroutine is just an integer that keeps track of
its last line of code (which corresponds to a case label on the same line). This
integer is often called index
, line
, or sequence point
.
Minimal Implementation
typedef struct coroutine_t { int index; float elapsed; /* optional */ } coroutine_t; #define COROUTINE_START(co) do { switch (co->index) { default: #define COROUTINE_EXIT(co) do { goto __co_end; } while (0) #define COROUTINE_YIELD(co) do { co->index = __LINE__; COROUTINE_EXIT(co); case __LINE__:; } while (0) #define COROUTINE_END(co) } co->index = 0; __co_end:; } while (0) /* useful macros */ #define COROUTINE_WAIT(co, time, dt) do { case __LINE__: co->index = __LINE__; co->elapsed += dt; do { if (co->elapsed < time) { goto __co_end; } else { co->elapsed = 0; } } while (0); } while (0) #define COROUTINE_CASE(co, name) case __LINE__: name: co->index = __LINE__;
Usage code after macro expansion would look like this:
typedef struct coroutine_t { int index; float elapsed; /* optional */ } coroutine_t; #define COROUTINE_START(co) do { switch (co->index) { default: #define COROUTINE_EXIT(co) do { goto __co_end; } while (0) #define COROUTINE_YIELD(co) do { co->index = __LINE__; COROUTINE_EXIT(co); case __LINE__:; } while (0) #define COROUTINE_END(co) } co->index = 0; __co_end:; } while (0) /* useful macros */ #define COROUTINE_WAIT(co, time, dt) do { case __LINE__: co->index = __LINE__; co->elapsed += dt; do { if (co->elapsed < time) { goto __co_end; } else { co->elapsed = 0; } } while (0); } while (0) #define COROUTINE_CASE(co, name) case __LINE__: name: co->index = __LINE__; void some_function(coroutine_t* co) { switch (co->index) { default: // START printf("First\n"); co->index = __LINE__; COROUTINE_EXIT(co); case __LINE__:; // YIELD printf("Third\n"); } co->index = 0; __co_end:; // END } static coroutine_t co; some_function(&co); printf("Second\n"); some_function(&co);
TODO Macro Implementation without COROUTINEEND
Implementation without having to specify COROUTINEEND.
#include <stdio.h> typedef struct coroutine_t { float elapsed; int index; } coroutine_t; #define coroutine(co) coroutine_t* _co = co; switch (_co->index) default: if(0) _body: break; else for(int _i_##__LINE__ = (0); !_i_##__LINE__; (_i_##__LINE__ += 1), _co->index = 0) #define yield _co->index = __LINE__; goto _body; case __LINE__:; #define case(name) case __LINE__: name: _co->index = __LINE__; /* helper */ #define wait(time, dt) case __LINE__: _co->index = __LINE__; _co->elapsed += dt; if (_co->elapsed < time) { goto _body; } else { _co->elapsed = 0; } void coro_func(coroutine_t* co) { static int i = 2; coroutine(co) { printf("first\n"); yield; printf("second\n"); yield; if (i == 2) { goto test; } while(1) { i--; printf("in while\n"); if (!i) break; // breaking works now yield; } case(test); { printf("test case\n"); yield; } wait(1.0f, 0.3f); printf("third\n"); goto test; } } int main() { coroutine_t co = {0}; coro_func(&co); coro_func(&co); coro_func(&co); // 0.3f coro_func(&co); // 0.6f coro_func(&co); // 0.9f coro_func(&co); // 1.2f coro_func(&co); // 1.2f coro_func(&co); // 1.2f }
Alternative Macro Implementation
// dependency: sokol_time.h typedef struct Coroutine Coroutine; struct Coroutine { S32 line; U64 start_time; // used for CoroutineWait void* data; }; #define CoroutineBegin(coro) switch (coro->line) {case 0: coro->line = 0; #define CoroutineYield(coro) do { coro->line = __LINE__; return; case __LINE__:;} while(0) #define CoroutineYieldUntil(coro, condition) while (!(condition)) { CoroutineYield(coro); } #define CoroutineWait(coro, duration) do {if (coro->start_time == 0.0f) { coro->start_time = stm_now(); } \ CoroutineYieldUntil(coro, stm_sec(stm_now()) > stm_sec(coro->start_time) + (F64)duration); coro->start_time = 0; } while (0) #define CoroutineEnd(coro) do { coro->line = __LINE__; } while (0); } #define CoroutineReset(coro) do { coro->line = 0; } while (0); } #define CoroutineIsFinished(coro) (coro->line == -1)
switch (coro->line) {case 0: coro->line = 0; // BEGIN coro->line = __LINE__; return; case __LINE__:; // YIELD while (!(condition)) { CoroutineYield(coro); } // YIELD UNTIL if (coro->start_time == 0.0f) { coro->start_time = stm_now(); } // WAIT CoroutineYieldUntil(coro, stm_sec(stm_now()) > stm_sec(coro->start_time) + (F64)duration); coro->start_time = 0; coro->line = __LINE__; } // END or.. coro->line = 0; } // ...RESET
More Elaborate Macro Implementation
This implementation allows for every coroutine to have its own stack and allocate local variables from it and can allow nested coroutine (i.e a function using a coroutine passes that coroutine to another function using coroutines).
typedef struct { float elapsed; // timer int flag; // ? int index; // code-line & case-label #define COROUTINE_MAX_DEPTH 8 // how deep into nested coroutines we go int line[COROUTINE_MAX_DEPTH]; // keeps track of the sequence point of nested coroutines int stack_pointer; // used for pushing on to the stack #define COROUTINE_STACK_SIZE (512) // static stack size for every coroutine char stack[COROUTINE_STACK_SIZE]; // the stack, usage optional } coroutine_t; #define COROUTINE_CASE_OFFSET (1024 * 1024) static inline void coroutine_init(coroutine_t* co) /* Just zero out the coroutine */ { co->elapsed = 0; co->flag = 0; co->index = 0; for (int i = 0; i < COROUTINE_MAX_DEPTH; ++i) co->line[i] = 0; co->stack_pointer = 0; } /** * Pops off `size` bytes from `co`'s fixed-size buffer for use as a local variable. * * void do_work(coroutine_t* co) * { * int* a = (int*)coroutine_local_var(co, sizeof(int)); * int* b = (int*)coroutine_local_var(co, sizeof(int)); * * COROUTINE_START(co); // Setup initial values here. This code runs just once. * *a = 3; *b = 7; * // ... * COROUTINE_END(co); * } */ static inline void* coroutine_local_var(coroutine_t* co, int size) { COROUTINE_ASSERT(co->stack_pointer + size < COROUTINE_STACK_SIZE); void* ptr = co->stack + co->stack_pointer; co->stack_pointer += (int)size; return ptr; } #define COROUTINE_START(co) do { co->flag = 0; switch (co->line[co->index]) { default: /* default is 0, and will run just once. */ #define COROUTINE_CASE(co, name) case __LINE__: name: co->line[co->index] = __LINE__; #define COROUTINE_WAIT(co, time, dt) do { case __LINE__: co->line[co->index] = __LINE__; co->elapsed += dt; do { if (co->elapsed < time) { co->flag = 1; goto __co_end; } else { co->elapsed = 0; } } while (0); } while (0) #define COROUTINE_EXIT(co) do { co->flag = 1; goto __co_end; } while (0) #define COROUTINE_YIELD(co) do { co->line[co->index] = __LINE__; COROUTINE_EXIT(co); case __LINE__:; } while (0) /* COROUTINE_CALL(co, call_some_other_func(co, my_params)); */ #define COROUTINE_CALL(co, ...) co->flag = 0; case __LINE__: COROUTINE_ASSERT(co->index < COROUTINE_MAX_DEPTH); co->line[co->index++] = __LINE__; __VA_ARGS__; co->index--; do { if (co->flag) { goto __co_end; } else { case __LINE__ + COROUTINE_CASE_OFFSET: co->line[co->index] = __LINE__ + COROUTINE_CASE_OFFSET; } } while (0) #define COROUTINE_END(co) } co->line[co->index] = 0; __co_end:; co->stack_pointer = 0; } while (0)
Coroutines in C using setjmp
#include <setjmp.h> #define STACK_SIZE 16384 volatile int unoptimisable_ = 1; struct cr_ { struct cr_ *next; jmp_buf ctx; }; struct cr_ main_cr_ = {NULL}; struct cr_ *first_cr_ = &main_cr_; struct cr_ *last_cr_ = &main_cr_; #define go(fn) \ do {\ if(!setjmp(first_cr_->ctx)) {\ char *stack = malloc(STACK_SIZE);\ int anchor_[unoptimisable_];\ char filler_[(char*)&anchor_ - (char*)(stack + STACK_SIZE)];\ struct cr_ cr[unoptimisable_];\ cr->next = first_cr_;\ first_cr_ = cr;\ char *stack_[unoptimisable_];\ stack_[0] = stack;\ fn;\ free(stack_[0]);\ first_cr_ = first_cr_->next;\ longjmp(first_cr_->ctx, 1);\ }\ } while(0) void yield(void) { if(first_cr_ == last_cr_) return; if(setjmp(first_cr_->ctx)) return; struct cr_ *cr = first_cr_; first_cr_ = cr->next; cr->next = NULL; last_cr_->next = cr; last_cr_ = cr; longjmp(first_cr_->ctx, 1); }
#include <setjmp.h> #define STACK_SIZE 16384 volatile int unoptimisable_ = 1; struct cr_ { struct cr_ *next; jmp_buf ctx; }; struct cr_ main_cr_ = {NULL}; struct cr_ *first_cr_ = &main_cr_; struct cr_ *last_cr_ = &main_cr_; #define go(fn) \ do {\ if(!setjmp(first_cr_->ctx)) {\ char *stack = malloc(STACK_SIZE);\ int anchor_[unoptimisable_];\ char filler_[(char*)&anchor_ - (char*)(stack + STACK_SIZE)];\ struct cr_ cr[unoptimisable_];\ cr->next = first_cr_;\ first_cr_ = cr;\ char *stack_[unoptimisable_];\ stack_[0] = stack;\ fn;\ free(stack_[0]);\ first_cr_ = first_cr_->next;\ longjmp(first_cr_->ctx, 1);\ }\ } while(0) void yield(void) { if(first_cr_ == last_cr_) return; if(setjmp(first_cr_->ctx)) return; struct cr_ *cr = first_cr_; first_cr_ = cr->next; cr->next = NULL; last_cr_->next = cr; last_cr_ = cr; longjmp(first_cr_->ctx, 1); } void foo(int count, const char *text) { int i; for(i = 0; i != count; ++i) { printf("%s\n", text); yield(); } } int main() { go(foo(3, "a")); go(printf("Hello, %s!\n", "world")); go(foo(2, "b")); foo(5, "c"); return 0; }
Coroutines using Memory Arenas
Coroutine stack in back-buffered frame arena
- probably need to pass in curr and prev frame arena
- need to juggle stack between the two after each yield
- maybe could be integrated into the coroutine macros
- actually, do the juggle before coroutinebegin in a macro
every coroutine has its own subarena
- needs to clear arena after return
- means every coroutine needs an init where arena is alloc'ed
Resources
- Macro-based C Implementations
- Coroutines in C using Duff's Device
- Minimal Macro-based Implementation of Coroutines in C
- https://github.com/bigrando420/thomas/blob/main/sauce/th_coroutine.h
- Protothreads with a twist
- cuteroutine.h - C++ Coroutines
- routine.h
- Lightweight Concurrent Tasks in C
- Stackless Coroutine in C That Maintains Local Variables
- Video: Juggling Coroutine State between two Frame Arenas
- Cute Framework Coroutines
- Libaco: Records entire stack frames (real local variables)
- minicoro: Stackful asymmetric coroutines via assembly, ucontext or fibers
- cutecoroutine.h: Coroutine implementation using minicoro
- Lua Coroutine Basics
- Implementing Action Sequences and Cutscenes with Lua Coroutines
- Philosophy of coroutines