Type Tagging #1

Merged
cosmonaut merged 15 commits from venko/wraith-lang:identifiers into main 2021-05-14 18:54:11 +00:00
12 changed files with 594 additions and 17 deletions

View File

@ -23,7 +23,7 @@ find_package(LLVM)
include_directories(${CMAKE_SOURCE_DIR})
BISON_TARGET(Parser generators/wraith.y ${CMAKE_CURRENT_BINARY_DIR}/y.tab.c COMPILE_FLAGS "-d -v -t -Wcounterexamples")
BISON_TARGET(Parser generators/wraith.y ${CMAKE_CURRENT_BINARY_DIR}/y.tab.c COMPILE_FLAGS "-d -v -t")
FLEX_TARGET(Scanner generators/wraith.lex ${CMAKE_CURRENT_BINARY_DIR}/lex.yy.c)
ADD_FLEX_BISON_DEPENDENCY(Scanner Parser)
@ -41,9 +41,11 @@ add_executable(
# Source
src/ast.h
src/codegen.h
src/identcheck.h
src/parser.h
src/ast.c
src/codegen.c
src/identcheck.c
src/parser.c
src/main.c
# Generated code

6
commenttest.w Normal file
View File

@ -0,0 +1,6 @@
struct Program {
// This triggers a parse error
static Main(): int {
return 0;
}
}

44
iftest.w Normal file
View File

@ -0,0 +1,44 @@
struct MyStruct {
static MyFunc(): int {
myStructInt: int = 595959959;
return myStructInt;
}
}
struct Program {
static Foo(): int {
return 0;
}
static Main(): int {
myInt: int = 54;
if (myInt < 0) {
signTag: int = 0 - 1;
} else if (myInt == 0) {
signTag: int = 0;
} else {
signTag: int = 1;
}
myBool: bool;
if (myBool) {
if (myBool) {
if (myBool) {
if (myBool) {
if (myBool) {
lol: int = 69;
}
}
}
}
}
someInt: int = 9585858;
return 0;
}
static Bar(): int {
return 0;
}
}

11
reftest.w Normal file
View File

@ -0,0 +1,11 @@
struct MyStruct {
foo: int;
}
struct Program {
static Main(): int {
myStruct: Reference<MyStruct>;
myStruct = alloc MyStruct;
return 0;
}
}

View File

@ -2,21 +2,8 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char* strdup (const char* s)
{
size_t slen = strlen(s);
char* result = malloc(slen + 1);
if(result == NULL)
{
return NULL;
}
memcpy(result, s, slen+1);
return result;
}
#include "util.h"
const char* SyntaxKindString(SyntaxKind syntaxKind)
{
@ -29,6 +16,8 @@ const char* SyntaxKindString(SyntaxKind syntaxKind)
case Comment: return "Comment";
case CustomTypeNode: return "CustomTypeNode";
case Declaration: return "Declaration";
case Expression: return "Expression";
case ForLoop: return "ForLoop";
case DeclarationSequence: return "DeclarationSequence";
case FunctionArgumentSequence: return "FunctionArgumentSequence";
case FunctionCallExpression: return "FunctionCallExpression";
@ -108,6 +97,7 @@ Node* MakeIdentifierNode(
node->syntaxKind = Identifier;
node->value.string = strdup(id);
node->childCount = 0;
node->typeTag = NULL;
return node;
}
@ -517,7 +507,12 @@ static void PrintNode(Node *node, int tabCount)
break;
case Identifier:
printf("%s", node->value.string);
if (node->typeTag == NULL) {
printf("%s", node->value.string);
} else {
char *type = TypeTagToString(node->typeTag);
printf("%s<%s>", node->value.string, type);
}
break;
case Number:
@ -537,3 +532,77 @@ void PrintTree(Node *node, uint32_t tabCount)
PrintTree(node->children[i], tabCount + 1);
}
}
TypeTag* MakeTypeTag(Node *node) {
if (node == NULL) {
fprintf(stderr, "wraith: Attempted to call MakeTypeTag on null value.\n");
return NULL;
}
TypeTag *tag = (TypeTag*)malloc(sizeof(TypeTag));
switch (node->syntaxKind) {
case Type:
tag = MakeTypeTag(node->children[0]);
break;
case PrimitiveTypeNode:
tag->type = Primitive;
tag->value.primitiveType = node->primitiveType;
break;
case ReferenceTypeNode:
tag->type = Reference;
tag->value.referenceType = MakeTypeTag(node->children[0]);
break;
case CustomTypeNode:
tag->type = Custom;
tag->value.customType = strdup(node->value.string);
break;
case Declaration:
tag = MakeTypeTag(node->children[0]);
break;
case StructDeclaration:
tag->type = Custom;
tag->value.customType = strdup(node->children[0]->value.string);
printf("Struct tag: %s\n", TypeTagToString(tag));
break;
case FunctionDeclaration:
tag = MakeTypeTag(node->children[0]->children[1]);
break;
default:
fprintf(stderr,
"wraith: Attempted to call MakeTypeTag on"
" node with unsupported SyntaxKind: %s\n",
SyntaxKindString(node->syntaxKind));
return NULL;
}
return tag;
}
char* TypeTagToString(TypeTag *tag) {
if (tag == NULL) {
fprintf(stderr, "wraith: Attempted to call TypeTagToString with null value\n");
return NULL;
}
switch (tag->type) {
case Unknown:
return "Unknown";
case Primitive:
return PrimitiveTypeToString(tag->value.primitiveType);
case Reference: {
char *inner = TypeTagToString(tag->value.referenceType);
size_t innerStrLen = strlen(inner);
char *result = malloc(sizeof(char) * (innerStrLen + 5));
sprintf(result, "Ref<%s>", inner);
return result;
}
case Custom:
return tag->value.customType;
}
}

View File

@ -2,6 +2,7 @@
#define WRAITH_AST_H
#include <stdint.h>
#include "identcheck.h"
typedef enum
{
@ -71,6 +72,26 @@ typedef union
BinaryOperator binaryOperator;
} Operator;
typedef struct TypeTag
{
enum Type
{
Unknown,
Primitive,
Reference,
Custom
} type;
union
{
/* Valid when type = Primitive. */
PrimitiveType primitiveType;
/* Valid when type = Reference. */
struct TypeTag *referenceType;
/* Valid when type = Custom. */
char *customType;
} value;
} TypeTag;
typedef struct Node
{
SyntaxKind syntaxKind;
@ -87,9 +108,10 @@ typedef struct Node
uint64_t number;
} value;
PrimitiveType primitiveType;
TypeTag *typeTag;
IdNode *idLink;
} Node;
char* strdup (const char* s);
const char* SyntaxKindString(SyntaxKind syntaxKind);
uint8_t IsPrimitiveType(Node *typeNode);
@ -211,5 +233,9 @@ Node* MakeForLoopNode(
);
void PrintTree(Node *node, uint32_t tabCount);
const char* SyntaxKindString(SyntaxKind syntaxKind);
TypeTag* MakeTypeTag(Node *node);
char* TypeTagToString(TypeTag *tag);
#endif /* WRAITH_AST_H */

328
src/identcheck.c Normal file
View File

@ -0,0 +1,328 @@
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "ast.h"
#include "identcheck.h"
IdNode* MakeIdNode(NodeType type, char *name, IdNode *parent) {
IdNode *node = (IdNode*)malloc(sizeof(IdNode));
node->type = type;
node->name = strdup(name);
node->parent = parent;
node->childCount = 0;
node->childCapacity = 0;
node->children = NULL;
node->typeTag = NULL;
return node;
}
void AddChildToNode(IdNode *node, IdNode *child) {
if (child == NULL) return;
if (node->children == NULL) {
node->childCapacity = 2;
node->children = (IdNode**) malloc(sizeof(IdNode*) * node->childCapacity);
} else if (node->childCount == node->childCapacity) {
node->childCapacity *= 2;
node->children = (IdNode**) realloc(node->children, sizeof(IdNode*) * node->childCapacity);
}
node->children[node->childCount] = child;
node->childCount += 1;
}
IdNode* MakeIdTree(Node *astNode, IdNode *parent) {
uint32_t i;
IdNode *mainNode;
switch (astNode->syntaxKind) {
case Assignment: {
if (astNode->children[0]->syntaxKind == Declaration) {
return MakeIdTree(astNode->children[0], parent);
} else {
for (i = 0; i < astNode->childCount; i++) {
AddChildToNode(parent, MakeIdTree(astNode->children[i], parent));
}
return NULL;
}
}
case IfStatement: {
mainNode = MakeIdNode(OrderedScope, "if", parent);
Node *clause = astNode->children[0];
Node *stmtSeq = astNode->children[1];
for (i = 0; i < clause->childCount; i++) {
AddChildToNode(mainNode, MakeIdTree(clause->children[i], mainNode));
}
for (i = 0; i < stmtSeq->childCount; i++) {
AddChildToNode(mainNode, MakeIdTree(stmtSeq->children[i], mainNode));
}
break;
}
case IfElseStatement: {
Node *ifNode = astNode->children[0];
Node *elseStmts = astNode->children[1];
mainNode = MakeIdNode(OrderedScope, "if-else", parent);
IdNode *ifBranch = MakeIdTree(ifNode, mainNode);
IdNode *elseBranch = MakeIdNode(OrderedScope, "else", mainNode);
AddChildToNode(mainNode, ifBranch);
for (i = 0; i < elseStmts->childCount; i++) {
AddChildToNode(elseBranch, MakeIdTree(elseStmts->children[i], elseBranch));
}
AddChildToNode(mainNode, elseBranch);
break;
}
case ForLoop: {
Node *loopDecl = astNode->children[0];
Node *loopBody = astNode->children[3];
mainNode = MakeIdNode(OrderedScope, "for-loop", parent);
AddChildToNode(mainNode, MakeIdTree(loopDecl, mainNode));
for (i = 0; i < loopBody->childCount; i++) {
AddChildToNode(mainNode, MakeIdTree(loopBody->children[i], mainNode));
}
break;
}
case Declaration: {
mainNode = MakeIdNode(Variable, astNode->children[1]->value.string, parent);
mainNode->typeTag = MakeTypeTag(astNode);
astNode->children[1]->typeTag = mainNode->typeTag;
break;
}
case StructDeclaration: {
Node *idNode = astNode->children[0];
Node *declsNode = astNode->children[1];
mainNode = MakeIdNode(Struct, idNode->value.string, parent);
mainNode->typeTag = MakeTypeTag(astNode);
for (i = 0; i < declsNode->childCount; i++) {
AddChildToNode(mainNode, MakeIdTree(declsNode->children[i], mainNode));
}
break;
}
case FunctionDeclaration: {
Node *sigNode = astNode->children[0];
Node *funcNameNode = sigNode->children[0];
Node *funcArgsNode = sigNode->children[2];
Node *bodyStatementsNode = astNode->children[1];
mainNode = MakeIdNode(Function, funcNameNode->value.string, parent);
mainNode->typeTag = MakeTypeTag(astNode);
astNode->children[0]->children[0]->typeTag = mainNode->typeTag;
for (i = 0; i < funcArgsNode->childCount; i++) {
AddChildToNode(mainNode, MakeIdTree(funcArgsNode->children[i], mainNode));
}
for (i = 0; i < bodyStatementsNode->childCount; i++) {
AddChildToNode(mainNode, MakeIdTree(bodyStatementsNode->children[i], mainNode));
}
break;
}
case DeclarationSequence: {
mainNode = MakeIdNode(UnorderedScope, "", parent);
for (i = 0; i < astNode->childCount; i++) {
AddChildToNode(mainNode, MakeIdTree(astNode->children[i], mainNode));
}
break;
}
case Identifier: {
mainNode = MakeIdNode(Placeholder, astNode->value.string, parent);
IdNode *lookupNode = LookupId(mainNode, NULL, astNode->value.string);
if (lookupNode == NULL) {
fprintf(stderr, "wraith: Could not find IdNode for id %s\n", astNode->value.string);
TypeTag *tag = (TypeTag*)malloc(sizeof(TypeTag));
tag->type = Unknown;
astNode->typeTag = tag;
} else {
astNode->typeTag = lookupNode->typeTag;
}
break;
}
default: {
for (i = 0; i < astNode->childCount; i++) {
AddChildToNode(parent, MakeIdTree(astNode->children[i], parent));
}
return NULL;
}
}
astNode->idLink = mainNode;
return mainNode;
}
void PrintIdNode(IdNode *node) {
if (node == NULL) {
fprintf(stderr, "wraith: Attempted to call PrintIdNode with null value.\n");
return;
}
switch(node->type) {
case Placeholder:
printf("Placeholder (%s)\n", node->name);
break;
case OrderedScope:
printf("OrderedScope (%s)\n", node->name);
break;
case UnorderedScope:
printf("UnorderedScope (%s)\n", node->name);
break;
case Struct:
printf("%s : %s\n", node->name, TypeTagToString(node->typeTag));
break;
case Function:
printf("%s : Function<%s>\n", node->name, TypeTagToString(node->typeTag));
break;
case Variable:
printf("%s : %s\n", node->name, TypeTagToString(node->typeTag));
break;
}
}
void PrintIdTree(IdNode *tree, uint32_t tabCount) {
if (tree == NULL) {
fprintf(stderr, "wraith: Attempted to call PrintIdTree on a null value.\n");
return;
}
uint32_t i;
for (i = 0; i < tabCount; i++) {
printf("| ");
}
PrintIdNode(tree);
for (i = 0; i < tree->childCount; i++) {
PrintIdTree(tree->children[i], tabCount + 1);
}
}
int PrintAncestors(IdNode *node) {
if (node == NULL) return -1;
int i;
int indent = 1;
indent += PrintAncestors(node->parent);
for (i = 0; i < indent; i++) {
printf(" ");
}
PrintIdNode(node);
return indent;
}
IdNode* LookdownId(IdNode *root, NodeType targetType, char *targetName) {
if (root == NULL) {
fprintf(stderr, "wraith: Attempted to call LookdownId on a null value.\n");
return NULL;
}
IdNode *result = NULL;
IdNode **frontier = (IdNode**)malloc(sizeof(IdNode*));
frontier[0] = root;
uint32_t frontierCount = 1;
while (frontierCount > 0) {
IdNode *current = frontier[0];
if (current->type == targetType && strcmp(current->name, targetName) == 0) {
result = current;
break;
}
uint32_t i;
for(i = 1; i < frontierCount; i++) {
frontier[i-1] = frontier[i];
}
size_t newSize = frontierCount + current->childCount - 1;
if (frontierCount != newSize) {
frontier = (IdNode**) realloc(frontier, sizeof(IdNode*) * newSize);
}
for (i = 0; i < current->childCount; i++) {
frontier[frontierCount + i - 1] = current->children[i];
}
frontierCount = newSize;
}
free(frontier);
return result;
}
bool ScopeHasOrdering(IdNode *node) {
switch (node->type) {
case OrderedScope:
case Function:
case Variable: /* this is only technically true */
return true;
default:
return false;
}
}
IdNode* LookupId(IdNode *node, IdNode *prev, char *target) {
if (node == NULL) {
return NULL;
}
if (strcmp(node->name, target) == 0 && node->type != Placeholder) {
return node;
}
/* If this is the start of our search, we should not attempt to look at child nodes. Only
* looking up the scope tree is valid at this point.
*
* This has the notable side-effect that this function will return NULL if you attempt to look
* up a struct's internals starting from the node representing the struct itself. This is
* because an IdNode corresponds to the location *where an identifier is first declared.* Thus,
* an identifier has no knowledge of identifiers declared "inside" of it.
*/
if (prev == NULL) {
return LookupId(node->parent, node, target);
}
/* If the current node forms an ordered scope then we want to prevent ourselves from looking
* up identifiers declared after the scope we have just come from.
*/
uint32_t idxLimit;
if (ScopeHasOrdering(node)) {
uint32_t i;
for (i = 0, idxLimit = 0; i < node->childCount; i++, idxLimit++) {
if (node->children[i] == prev) {
break;
}
}
} else {
idxLimit = node->childCount;
}
uint32_t i;
for (i = 0; i < idxLimit; i++) {
IdNode *child = node->children[i];
if (child == prev || child->type == Placeholder) {
/* Do not inspect the node we just came from or placeholders. */
continue;
}
if (strcmp(child->name, target) == 0) {
return child;
}
if (child->type == Struct) {
uint32_t j;
for (j = 0; j < child->childCount; j++) {
IdNode *grandchild = child->children[j];
if (strcmp(grandchild->name, target) == 0) {
return grandchild;
}
}
}
}
return LookupId(node->parent, node, target);
}

46
src/identcheck.h Normal file
View File

@ -0,0 +1,46 @@
/* Validates identifier usage in an AST. */
#ifndef WRAITH_IDENTCHECK_H
#define WRAITH_IDENTCHECK_H
#include <stdint.h>
#include "ast.h"
struct TypeTag;
struct Node;
typedef enum NodeType {
Placeholder,
UnorderedScope,
OrderedScope,
Struct,
Function,
Variable
} NodeType;
typedef struct IdNode {
NodeType type;
char *name;
struct TypeTag *typeTag;
struct IdNode *parent;
struct IdNode **children;
uint32_t childCount;
uint32_t childCapacity;
} IdNode;
typedef struct IdStatus {
enum StatusCode {
Valid,
} StatusCode;
} IdStatus;
IdNode* MakeIdTree(struct Node *astNode, IdNode *parent);
void PrintIdNode(IdNode *node);
void PrintIdTree(IdNode *tree, uint32_t tabCount);
int PrintAncestors(IdNode *node);
IdNode* LookdownId(IdNode *root, NodeType targetType, char *targetName);
IdNode* LookupId(IdNode *node, IdNode *prev, char* target);

Unused code

Unused code
#endif /* WRAITH_IDENTCHECK_H */

View File

@ -3,6 +3,7 @@
#include "parser.h"
#include "codegen.h"
#include "identcheck.h"
int main(int argc, char *argv[])
{
@ -64,6 +65,12 @@ int main(int argc, char *argv[])
}
else
{
{
IdNode *idTree = MakeIdTree(rootNode, NULL);
PrintIdTree(idTree, /*tabCount=*/0);
printf("\n");
PrintTree(rootNode, /*tabCount=*/0);
}
exitCode = Codegen(rootNode, optimizationLevel);
}
}

16
src/util.c Normal file
View File

@ -0,0 +1,16 @@
#include "util.h"
#include <string.h>
char* strdup (const char* s)
{
size_t slen = strlen(s);
char* result = malloc(slen + 1);
if(result == NULL)
{
return NULL;
}
memcpy(result, s, slen+1);
return result;
}

8
src/util.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef WRAITH_UTIL_H
#define WRAITH_UTIL_H
#include <string.h>
char* strdup (const char* s);
#endif /* WRAITH_UTIL_H */

14
types.w Normal file
View File

@ -0,0 +1,14 @@
struct MyStruct {
foo: int;
bar: bool;
MyFunction(): int {
return foo * 420;
}
}
struct Program {
static Main(): int {
decl: Reference<Reference<Reference<Reference<Reference<MyStruct>>>>>;
return 0;
}
}