/* * This is a showcase for my C dialect concept, nicknamed "Type-C". * I start from ISO C11 and make subsequent modifications. All of this is based * on my experience with low level systems programming which, even though i would * not necessarily consider myself an expert in, i have spent quite some time * with over the last few years. During that time, several "features" of C * (or lack thereof) have proven to be rather annoying, which is why i would * like to address them here. * * Note that certain syntaxes are debatable (especially the range notation, as * you will see), but i am primarily concerned about the ideas in themselves * rather than the particular way they are implemented. * * The main theme throughout this entire concept is "let the compiler handle it". * A lot of sanity checks can be eliminated with a more powerful type system. * Most of the extensions here only concern the type system and don't emit any * extra code in the binary, which should make writing vulnerable code somwhat * more difficult without having any significant impact on the performance and * transparency that C is known and loved for. * * As a first preface, the new keywords introduced in C99 and later (_Alignof, * _Atomic, _Bool, et al.) are written without the leading underscore and all * lowercase (so alignof, atomic, bool, etc). * Furthermore, the following additional keywords are introcuced: * - namespace * - using * - typeof */ /* * You can use GNU style generator expressions in macros. * And the typeof operator is a thing. */ #define abs(x) ({ \ typeof(x) __x = (x); \ __x < 0 ? -__x : __x; \ }) /* there are namespaces! */ namespace my_ns { /* * No K&R style functions anymore, so you don't need void in the parameter list. * * Since this symbol is namespaced, its name will need to be mangled. * You could transform it to * @my_ns@my_fn * which is the prettier variant but incompatible with ISO C. * Another option would be * _$my_ns$my_fn * which is technically still incompatible, but at least gcc and clang eat it. * Furthermore, you can also access it from GNU style assembly (and probably * nasm as well, even though i haven't checked). One last option would be * _N__my_ns__my_fn * which would require reserving all identifierts that contain two underscores * anywhere in their name (just like C++ does), but is fully compatible with * ISO C as long as you stop being a bitch about the reserved identifiers * starting with an underscore followed by a capital letter thing. * At the end of the day, the exact implementation of name mangling doesn't * matter as long as it is predictable enough that you can refer to namespaced * symbols from other languages without thinking too much about it. */ void my_fn(signed char s, unsigned char u) { /* signed integrals are always represented in two's complement */ assert(~INT_MAX == INT_MIN); assert(INT_MAX + 1 == INT_MIN); /* integer promotions for bitwise operators are always unsigned */ assert((unsigned)(s >> 1) == (unsigned)s >> 1); /* comparisons between integrals of different signedness require an explicit cast ... */ assert(s < u); /* compile error, different signedness */ assert(s < (signed char)u); /* compiles fine */ /* ... except when it is known at compile time that the value cannot be negative * (please ignore the edge case if s is CHAR_MIN here, that's not the point) */ assert(abs(s) == u); /* compiles fine */ } /* * Enum types must have a width that is sufficient to represent any of its * members, and at least the width of int. If a member cannot be stored in * the largest integral type supported natively by the architecture or a * compiler extension (like long long on i386), the program cannot be compiled. */ enum my_flags { MY_FLAG_1 = 1 << 8, MY_FLAG_2 = 1 << 16, MY_FLAG_3 = 1 << 24, /* this gives a compile error on x86 */ MY_FLAG_4 = 1 << 128, } } /* namespace my_ns */ /* * Types can be made incompatible with their original by writing a tilde before * the type name in a typedef. As a result, variables of type mytype_t can't be * assigned values of type int, unless it is a literal. It *does* work the other * way around, though: Variables of type int can take values of type mytype_t. */ typedef int ~mytype_t; /* * All integral types have a range of allowed values, which has a lower and * upper limit. If omitted, it implicitly defaults to the entire range of * values that type can represent. This range is determined entirely at * compile time, and changes depending on the current scope (like branches) * unless it is declared volatile. * * The range may also be specified explicitly, like here. * direction_t is an int that may only be -1, 0, or 1 (not 2). */ typedef int<-1,2> direction_t; /* * I am unsure whether this should also be possible for pointers. It might * be useful in memory management systems and maybe for separating user/kernel * addresses, but other than that i can only think of one other use case: */ #define non_null(type) type *<1,> int main() { /* accessing namespaced identifiers works just like in C++, btw */ my_ns::my_fn(-1, 1); /* non-local identifiers don't require a forward declaration if they are defined * in the same compilation unit; there are no implicit function declarations */ change_direction(1); /* this gives a compile error because 2 is outside the type's specified range */ change_direction(2); } void change_direction(direction_t dir) { switch (dir) { case -1: puts("going backward"); break; case 0: puts("not moving"); break; case 1: puts("going forward"); break; case 2: /* this is dead code */ break; } } void change_direction_wrapper(int dir) { /* "dir" currently has an implicit type of int<,> */ /* this gives a compile error because change_direction * wants a parameter of type direction_t (aka int<-1,2>), * which the type of "dir" (int<,>) does not satisfy */ change_direction(dir); /* this compiles fine */ if (dir >= -1 && dir < 2) { /* "dir" has implicit type int<-1,2> in this scope */ change_direction(dir); } /* after the if block, "dir" is of type int<,> again */ /* this also compiles fine, but it bypasses the range check */ change_direction((direction_t)dir); } /* * This exemplary ring buffer implementation demonstrates the power of ranges */ struct ring { size_t capacity; /* rpos and wpos must be less than capacity */ size_t<,.capacity> rpos; size_t<,.capacity> wpos; /* * The last member in a struct may have a size that depends on another * struct member. This is *only* for type checking though, and sizeof * treats it as size 0. */ char data[.capacity]; } /* * The capacity parameter needs this range constraint, because otherwise the range * for rpos and wpos would be empty (also, having a ring buffer that can't store * anything does not make sense) */ struct ring *ring_create(size_t<1,> capacity) { struct ring *ring = malloc(sizeof(*ring) + capacity); if (ring) { /* * Initializer lists may also be used on pointers to structs. * If the name of the struct member matches the variable name it * is being assigned from, it may be omitted ("capacity" in this * example). In all other cases, the member name must always be * specified explicitly to avoid ambiguity. This applies to any * struct initializer list; ANSI C style ones (i.e. without the * member name, where the order of values matters) are prohibited. */ *ring = { capacity, .rpos = 0, .wpos = 0, }; /* equivalent code in ISO C */ ring->capacity = capacity; ring->rpos = 0; ring->wpos = 0; /* * Technically, the above expression sort of implies that the * "data" member (which does not have an explicit initializer) * will be zero-initialized, to be consistent with other types of * initialization. I'm not entirely sure whether this is a good * idea though, especially in freestanding environments where the * compiler would have to inject a custom loop for this. * The data array is also a special case, because it has a dynamic * size. Maybe only non-dynamic members should be implicitly * zero-initialized, but that in turn has serious bug potential. */ memset(ring->data, 0, capacity); } return ring; } /* * Ranges may also depend on a function parameter. * ring_write() cannot return any value greater than the length of the buffer. */ size_t<,len> ring_write(struct ring *ring, const void *buf, size_t len) { const char *pos = buf; const char *end = buf + len; /* pointer arithmetic on void is allowed */ while (pos != end && ring->wpos != ring->rpos) { ring->data[ring->wpos++] = *pos++; /* * If this line was removed, the one above would result in a compile error. * This is because the type of wpos requires it to be less than capacity, * but it is mutated. This guarantees that the data array is never accessed * out of bounds, because its length is annotated to be equal to capacity. * * If wpos was declared volatile, it would have to be copied to a local * variable first. Since it's not, the compiler does that automatically * (as it would do in regular C with optimization turned on). */ ring->wpos %= ring->capacity; } return (size_t)(pos - (const char *)buf); } /* * Array parameters may have an optional length specified through another parameter * attached to them. This is just for compile time checks to determine whether the * array is possibly accessed out of bounds and emits no actual code in the binary. * It might be a good idea to look into a syntactic way of specifying the byte size * of void pointers, as this specific example would always require casting to char * * before passing a buffer to the function. */ size_t<,len> ring_read(char buf[len], struct ring *ring, size_t len) { char *pos = buf; /* defining a pointer beyond the buffer's range does not emit * any warnings as long as that pointer is never dereferenced */ char *end = &buf[len]; while (pos != end && ring->rpos != ring->wpos) { *pos++ = ring->data[ring->rpos++]; ring->rpos %= ring->capacity; } return (size_t)(pos - buf); }