diff --git a/CMakeLists.txt b/CMakeLists.txt index 56d5ca6..a3cbf9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ find_package(LLVM) include_directories(${CMAKE_SOURCE_DIR}) -BISON_TARGET(Parser generators/wraith.y ${CMAKE_CURRENT_BINARY_DIR}/y.tab.c COMPILE_FLAGS "-d -v -t -Wcounterexamples") +BISON_TARGET(Parser generators/wraith.y ${CMAKE_CURRENT_BINARY_DIR}/y.tab.c COMPILE_FLAGS "-d -v -t") FLEX_TARGET(Scanner generators/wraith.lex ${CMAKE_CURRENT_BINARY_DIR}/lex.yy.c) ADD_FLEX_BISON_DEPENDENCY(Scanner Parser) @@ -41,9 +41,11 @@ add_executable( # Source src/ast.h src/codegen.h + src/identcheck.h src/parser.h src/ast.c src/codegen.c + src/identcheck.c src/parser.c src/main.c # Generated code diff --git a/commenttest.w b/commenttest.w new file mode 100644 index 0000000..70cd73e --- /dev/null +++ b/commenttest.w @@ -0,0 +1,6 @@ +struct Program { + // This triggers a parse error + static Main(): int { + return 0; + } +} \ No newline at end of file diff --git a/iftest.w b/iftest.w new file mode 100644 index 0000000..efa5bfa --- /dev/null +++ b/iftest.w @@ -0,0 +1,44 @@ +struct MyStruct { + static MyFunc(): int { + myStructInt: int = 595959959; + return myStructInt; + } +} + +struct Program { + static Foo(): int { + return 0; + } + + static Main(): int { + myInt: int = 54; + if (myInt < 0) { + signTag: int = 0 - 1; + } else if (myInt == 0) { + signTag: int = 0; + } else { + signTag: int = 1; + } + + myBool: bool; + if (myBool) { + if (myBool) { + if (myBool) { + if (myBool) { + if (myBool) { + lol: int = 69; + } + } + } + } + } + + someInt: int = 9585858; + + return 0; + } + + static Bar(): int { + return 0; + } +} diff --git a/reftest.w b/reftest.w new file mode 100644 index 0000000..ece6a0c --- /dev/null +++ b/reftest.w @@ -0,0 +1,11 @@ +struct MyStruct { + foo: int; +} + +struct Program { + static Main(): int { + myStruct: Reference; + myStruct = alloc MyStruct; + return 0; + } +} \ No newline at end of file diff --git a/src/ast.c b/src/ast.c index 4f33a63..273739a 100644 --- a/src/ast.c +++ b/src/ast.c @@ -2,21 +2,8 @@ #include #include -#include -char* strdup (const char* s) -{ - size_t slen = strlen(s); - char* result = malloc(slen + 1); - - if(result == NULL) - { - return NULL; - } - - memcpy(result, s, slen+1); - return result; -} +#include "util.h" const char* SyntaxKindString(SyntaxKind syntaxKind) { @@ -29,6 +16,8 @@ const char* SyntaxKindString(SyntaxKind syntaxKind) case Comment: return "Comment"; case CustomTypeNode: return "CustomTypeNode"; case Declaration: return "Declaration"; + case Expression: return "Expression"; + case ForLoop: return "ForLoop"; case DeclarationSequence: return "DeclarationSequence"; case FunctionArgumentSequence: return "FunctionArgumentSequence"; case FunctionCallExpression: return "FunctionCallExpression"; @@ -108,6 +97,7 @@ Node* MakeIdentifierNode( node->syntaxKind = Identifier; node->value.string = strdup(id); node->childCount = 0; + node->typeTag = NULL; return node; } @@ -517,7 +507,12 @@ static void PrintNode(Node *node, int tabCount) break; case Identifier: - printf("%s", node->value.string); + if (node->typeTag == NULL) { + printf("%s", node->value.string); + } else { + char *type = TypeTagToString(node->typeTag); + printf("%s<%s>", node->value.string, type); + } break; case Number: @@ -537,3 +532,77 @@ void PrintTree(Node *node, uint32_t tabCount) PrintTree(node->children[i], tabCount + 1); } } + +TypeTag* MakeTypeTag(Node *node) { + if (node == NULL) { + fprintf(stderr, "wraith: Attempted to call MakeTypeTag on null value.\n"); + return NULL; + } + + TypeTag *tag = (TypeTag*)malloc(sizeof(TypeTag)); + switch (node->syntaxKind) { + case Type: + tag = MakeTypeTag(node->children[0]); + break; + + case PrimitiveTypeNode: + tag->type = Primitive; + tag->value.primitiveType = node->primitiveType; + break; + + case ReferenceTypeNode: + tag->type = Reference; + tag->value.referenceType = MakeTypeTag(node->children[0]); + break; + + case CustomTypeNode: + tag->type = Custom; + tag->value.customType = strdup(node->value.string); + break; + + case Declaration: + tag = MakeTypeTag(node->children[0]); + break; + + case StructDeclaration: + tag->type = Custom; + tag->value.customType = strdup(node->children[0]->value.string); + printf("Struct tag: %s\n", TypeTagToString(tag)); + break; + + case FunctionDeclaration: + tag = MakeTypeTag(node->children[0]->children[1]); + break; + + default: + fprintf(stderr, + "wraith: Attempted to call MakeTypeTag on" + " node with unsupported SyntaxKind: %s\n", + SyntaxKindString(node->syntaxKind)); + return NULL; + } + return tag; +} + +char* TypeTagToString(TypeTag *tag) { + if (tag == NULL) { + fprintf(stderr, "wraith: Attempted to call TypeTagToString with null value\n"); + return NULL; + } + + switch (tag->type) { + case Unknown: + return "Unknown"; + case Primitive: + return PrimitiveTypeToString(tag->value.primitiveType); + case Reference: { + char *inner = TypeTagToString(tag->value.referenceType); + size_t innerStrLen = strlen(inner); + char *result = malloc(sizeof(char) * (innerStrLen + 5)); + sprintf(result, "Ref<%s>", inner); + return result; + } + case Custom: + return tag->value.customType; + } +} \ No newline at end of file diff --git a/src/ast.h b/src/ast.h index 8e49ace..9c1312f 100644 --- a/src/ast.h +++ b/src/ast.h @@ -2,6 +2,7 @@ #define WRAITH_AST_H #include +#include "identcheck.h" typedef enum { @@ -71,6 +72,26 @@ typedef union BinaryOperator binaryOperator; } Operator; +typedef struct TypeTag +{ + enum Type + { + Unknown, + Primitive, + Reference, + Custom + } type; + union + { + /* Valid when type = Primitive. */ + PrimitiveType primitiveType; + /* Valid when type = Reference. */ + struct TypeTag *referenceType; + /* Valid when type = Custom. */ + char *customType; + } value; +} TypeTag; + typedef struct Node { SyntaxKind syntaxKind; @@ -87,9 +108,10 @@ typedef struct Node uint64_t number; } value; PrimitiveType primitiveType; + TypeTag *typeTag; + IdNode *idLink; } Node; -char* strdup (const char* s); const char* SyntaxKindString(SyntaxKind syntaxKind); uint8_t IsPrimitiveType(Node *typeNode); @@ -211,5 +233,9 @@ Node* MakeForLoopNode( ); void PrintTree(Node *node, uint32_t tabCount); +const char* SyntaxKindString(SyntaxKind syntaxKind); + +TypeTag* MakeTypeTag(Node *node); +char* TypeTagToString(TypeTag *tag); #endif /* WRAITH_AST_H */ diff --git a/src/identcheck.c b/src/identcheck.c new file mode 100644 index 0000000..6fe0ec4 --- /dev/null +++ b/src/identcheck.c @@ -0,0 +1,328 @@ +#include +#include +#include +#include +#include + +#include "ast.h" +#include "identcheck.h" + +IdNode* MakeIdNode(NodeType type, char *name, IdNode *parent) { + IdNode *node = (IdNode*)malloc(sizeof(IdNode)); + node->type = type; + node->name = strdup(name); + node->parent = parent; + node->childCount = 0; + node->childCapacity = 0; + node->children = NULL; + node->typeTag = NULL; + return node; +} + +void AddChildToNode(IdNode *node, IdNode *child) { + if (child == NULL) return; + + if (node->children == NULL) { + node->childCapacity = 2; + node->children = (IdNode**) malloc(sizeof(IdNode*) * node->childCapacity); + } else if (node->childCount == node->childCapacity) { + node->childCapacity *= 2; + node->children = (IdNode**) realloc(node->children, sizeof(IdNode*) * node->childCapacity); + } + + node->children[node->childCount] = child; + node->childCount += 1; +} + +IdNode* MakeIdTree(Node *astNode, IdNode *parent) { + uint32_t i; + IdNode *mainNode; + switch (astNode->syntaxKind) { + case Assignment: { + if (astNode->children[0]->syntaxKind == Declaration) { + return MakeIdTree(astNode->children[0], parent); + } else { + for (i = 0; i < astNode->childCount; i++) { + AddChildToNode(parent, MakeIdTree(astNode->children[i], parent)); + } + return NULL; + } + } + + case IfStatement: { + mainNode = MakeIdNode(OrderedScope, "if", parent); + Node *clause = astNode->children[0]; + Node *stmtSeq = astNode->children[1]; + for (i = 0; i < clause->childCount; i++) { + AddChildToNode(mainNode, MakeIdTree(clause->children[i], mainNode)); + } + for (i = 0; i < stmtSeq->childCount; i++) { + AddChildToNode(mainNode, MakeIdTree(stmtSeq->children[i], mainNode)); + } + break; + } + + case IfElseStatement: { + Node *ifNode = astNode->children[0]; + Node *elseStmts = astNode->children[1]; + mainNode = MakeIdNode(OrderedScope, "if-else", parent); + IdNode *ifBranch = MakeIdTree(ifNode, mainNode); + IdNode *elseBranch = MakeIdNode(OrderedScope, "else", mainNode); + + AddChildToNode(mainNode, ifBranch); + for (i = 0; i < elseStmts->childCount; i++) { + AddChildToNode(elseBranch, MakeIdTree(elseStmts->children[i], elseBranch)); + } + AddChildToNode(mainNode, elseBranch); + break; + } + + case ForLoop: { + Node *loopDecl = astNode->children[0]; + Node *loopBody = astNode->children[3]; + mainNode = MakeIdNode(OrderedScope, "for-loop", parent); + AddChildToNode(mainNode, MakeIdTree(loopDecl, mainNode)); + for (i = 0; i < loopBody->childCount; i++) { + AddChildToNode(mainNode, MakeIdTree(loopBody->children[i], mainNode)); + } + break; + } + + case Declaration: { + mainNode = MakeIdNode(Variable, astNode->children[1]->value.string, parent); + mainNode->typeTag = MakeTypeTag(astNode); + astNode->children[1]->typeTag = mainNode->typeTag; + break; + } + + case StructDeclaration: { + Node *idNode = astNode->children[0]; + Node *declsNode = astNode->children[1]; + mainNode = MakeIdNode(Struct, idNode->value.string, parent); + mainNode->typeTag = MakeTypeTag(astNode); + for (i = 0; i < declsNode->childCount; i++) { + AddChildToNode(mainNode, MakeIdTree(declsNode->children[i], mainNode)); + } + break; + } + + case FunctionDeclaration: { + Node *sigNode = astNode->children[0]; + Node *funcNameNode = sigNode->children[0]; + Node *funcArgsNode = sigNode->children[2]; + Node *bodyStatementsNode = astNode->children[1]; + mainNode = MakeIdNode(Function, funcNameNode->value.string, parent); + mainNode->typeTag = MakeTypeTag(astNode); + astNode->children[0]->children[0]->typeTag = mainNode->typeTag; + for (i = 0; i < funcArgsNode->childCount; i++) { + AddChildToNode(mainNode, MakeIdTree(funcArgsNode->children[i], mainNode)); + } + for (i = 0; i < bodyStatementsNode->childCount; i++) { + AddChildToNode(mainNode, MakeIdTree(bodyStatementsNode->children[i], mainNode)); + } + break; + } + + case DeclarationSequence: { + mainNode = MakeIdNode(UnorderedScope, "", parent); + for (i = 0; i < astNode->childCount; i++) { + AddChildToNode(mainNode, MakeIdTree(astNode->children[i], mainNode)); + } + break; + } + + case Identifier: { + mainNode = MakeIdNode(Placeholder, astNode->value.string, parent); + IdNode *lookupNode = LookupId(mainNode, NULL, astNode->value.string); + if (lookupNode == NULL) { + fprintf(stderr, "wraith: Could not find IdNode for id %s\n", astNode->value.string); + TypeTag *tag = (TypeTag*)malloc(sizeof(TypeTag)); + tag->type = Unknown; + astNode->typeTag = tag; + } else { + astNode->typeTag = lookupNode->typeTag; + } + break; + } + + default: { + for (i = 0; i < astNode->childCount; i++) { + AddChildToNode(parent, MakeIdTree(astNode->children[i], parent)); + } + return NULL; + } + } + + astNode->idLink = mainNode; + return mainNode; +} + +void PrintIdNode(IdNode *node) { + if (node == NULL) { + fprintf(stderr, "wraith: Attempted to call PrintIdNode with null value.\n"); + return; + } + + switch(node->type) { + case Placeholder: + printf("Placeholder (%s)\n", node->name); + break; + case OrderedScope: + printf("OrderedScope (%s)\n", node->name); + break; + case UnorderedScope: + printf("UnorderedScope (%s)\n", node->name); + break; + case Struct: + printf("%s : %s\n", node->name, TypeTagToString(node->typeTag)); + break; + case Function: + printf("%s : Function<%s>\n", node->name, TypeTagToString(node->typeTag)); + break; + case Variable: + printf("%s : %s\n", node->name, TypeTagToString(node->typeTag)); + break; + } +} + +void PrintIdTree(IdNode *tree, uint32_t tabCount) { + if (tree == NULL) { + fprintf(stderr, "wraith: Attempted to call PrintIdTree on a null value.\n"); + return; + } + + uint32_t i; + for (i = 0; i < tabCount; i++) { + printf("| "); + } + + PrintIdNode(tree); + + for (i = 0; i < tree->childCount; i++) { + PrintIdTree(tree->children[i], tabCount + 1); + } +} + + +int PrintAncestors(IdNode *node) { + if (node == NULL) return -1; + + int i; + int indent = 1; + indent += PrintAncestors(node->parent); + for (i = 0; i < indent; i++) { + printf(" "); + } + PrintIdNode(node); + return indent; +} + +IdNode* LookdownId(IdNode *root, NodeType targetType, char *targetName) { + if (root == NULL) { + fprintf(stderr, "wraith: Attempted to call LookdownId on a null value.\n"); + return NULL; + } + + IdNode *result = NULL; + IdNode **frontier = (IdNode**)malloc(sizeof(IdNode*)); + frontier[0] = root; + uint32_t frontierCount = 1; + + while (frontierCount > 0) { + IdNode *current = frontier[0]; + + if (current->type == targetType && strcmp(current->name, targetName) == 0) { + result = current; + break; + } + + uint32_t i; + for(i = 1; i < frontierCount; i++) { + frontier[i-1] = frontier[i]; + } + size_t newSize = frontierCount + current->childCount - 1; + if (frontierCount != newSize) { + frontier = (IdNode**) realloc(frontier, sizeof(IdNode*) * newSize); + } + for (i = 0; i < current->childCount; i++) { + frontier[frontierCount + i - 1] = current->children[i]; + } + frontierCount = newSize; + } + + free(frontier); + return result; +} + +bool ScopeHasOrdering(IdNode *node) { + switch (node->type) { + case OrderedScope: + case Function: + case Variable: /* this is only technically true */ + return true; + default: + return false; + } +} + +IdNode* LookupId(IdNode *node, IdNode *prev, char *target) { + if (node == NULL) { + return NULL; + } + + if (strcmp(node->name, target) == 0 && node->type != Placeholder) { + return node; + } + + /* If this is the start of our search, we should not attempt to look at child nodes. Only + * looking up the scope tree is valid at this point. + * + * This has the notable side-effect that this function will return NULL if you attempt to look + * up a struct's internals starting from the node representing the struct itself. This is + * because an IdNode corresponds to the location *where an identifier is first declared.* Thus, + * an identifier has no knowledge of identifiers declared "inside" of it. + */ + if (prev == NULL) { + return LookupId(node->parent, node, target); + } + + /* If the current node forms an ordered scope then we want to prevent ourselves from looking + * up identifiers declared after the scope we have just come from. + */ + uint32_t idxLimit; + if (ScopeHasOrdering(node)) { + uint32_t i; + for (i = 0, idxLimit = 0; i < node->childCount; i++, idxLimit++) { + if (node->children[i] == prev) { + break; + } + } + } else { + idxLimit = node->childCount; + } + + uint32_t i; + for (i = 0; i < idxLimit; i++) { + IdNode *child = node->children[i]; + if (child == prev || child->type == Placeholder) { + /* Do not inspect the node we just came from or placeholders. */ + continue; + } + + if (strcmp(child->name, target) == 0) { + return child; + } + + if (child->type == Struct) { + uint32_t j; + for (j = 0; j < child->childCount; j++) { + IdNode *grandchild = child->children[j]; + if (strcmp(grandchild->name, target) == 0) { + return grandchild; + } + } + } + } + + return LookupId(node->parent, node, target); +} diff --git a/src/identcheck.h b/src/identcheck.h new file mode 100644 index 0000000..2c75262 --- /dev/null +++ b/src/identcheck.h @@ -0,0 +1,46 @@ +/* Validates identifier usage in an AST. */ + +#ifndef WRAITH_IDENTCHECK_H +#define WRAITH_IDENTCHECK_H + +#include + +#include "ast.h" + +struct TypeTag; +struct Node; + +typedef enum NodeType { + Placeholder, + UnorderedScope, + OrderedScope, + Struct, + Function, + Variable +} NodeType; + +typedef struct IdNode { + NodeType type; + char *name; + struct TypeTag *typeTag; + struct IdNode *parent; + struct IdNode **children; + uint32_t childCount; + uint32_t childCapacity; +} IdNode; + +typedef struct IdStatus { + enum StatusCode { + Valid, + } StatusCode; +} IdStatus; + + +IdNode* MakeIdTree(struct Node *astNode, IdNode *parent); +void PrintIdNode(IdNode *node); +void PrintIdTree(IdNode *tree, uint32_t tabCount); +int PrintAncestors(IdNode *node); +IdNode* LookdownId(IdNode *root, NodeType targetType, char *targetName); +IdNode* LookupId(IdNode *node, IdNode *prev, char* target); + +#endif /* WRAITH_IDENTCHECK_H */ diff --git a/src/main.c b/src/main.c index 48a2301..f58f618 100644 --- a/src/main.c +++ b/src/main.c @@ -3,6 +3,7 @@ #include "parser.h" #include "codegen.h" +#include "identcheck.h" int main(int argc, char *argv[]) { @@ -64,6 +65,12 @@ int main(int argc, char *argv[]) } else { + { + IdNode *idTree = MakeIdTree(rootNode, NULL); + PrintIdTree(idTree, /*tabCount=*/0); + printf("\n"); + PrintTree(rootNode, /*tabCount=*/0); + } exitCode = Codegen(rootNode, optimizationLevel); } } diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..ae1654f --- /dev/null +++ b/src/util.c @@ -0,0 +1,16 @@ +#include "util.h" + +#include + +char* strdup (const char* s) +{ + size_t slen = strlen(s); + char* result = malloc(slen + 1); + if(result == NULL) + { + return NULL; + } + + memcpy(result, s, slen+1); + return result; +} \ No newline at end of file diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..99ffaa7 --- /dev/null +++ b/src/util.h @@ -0,0 +1,8 @@ +#ifndef WRAITH_UTIL_H +#define WRAITH_UTIL_H + +#include + +char* strdup (const char* s); + +#endif /* WRAITH_UTIL_H */ diff --git a/types.w b/types.w new file mode 100644 index 0000000..f83e5e5 --- /dev/null +++ b/types.w @@ -0,0 +1,14 @@ +struct MyStruct { + foo: int; + bar: bool; + MyFunction(): int { + return foo * 420; + } +} + +struct Program { + static Main(): int { + decl: Reference>>>>; + return 0; + } +} \ No newline at end of file