From 93dd1d5183e6fc77b11bb68656edd4c429a2c460 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Mon, 26 Feb 2024 20:14:03 +0000 Subject: [PATCH 01/18] Switched to pragma --- include/ast.hpp | 5 +---- include/ast_constant.hpp | 5 +---- include/ast_context.hpp | 5 +---- include/ast_direct_declarator.hpp | 5 +---- include/ast_function_definition.hpp | 5 +---- include/ast_identifier.hpp | 5 +---- include/ast_jump_statement.hpp | 5 +---- include/ast_node.hpp | 5 +---- include/ast_type_specifier.hpp | 5 +---- include/cli.h | 5 +---- 10 files changed, 10 insertions(+), 40 deletions(-) diff --git a/include/ast.hpp b/include/ast.hpp index 67c58f8..c269fe4 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -1,5 +1,4 @@ -#ifndef AST_HPP -#define AST_HPP +#pragma once #include #include @@ -15,5 +14,3 @@ #include "ast_context.hpp" extern Node *ParseAST(std::string file_name); - -#endif diff --git a/include/ast_constant.hpp b/include/ast_constant.hpp index df0371d..43bbf30 100644 --- a/include/ast_constant.hpp +++ b/include/ast_constant.hpp @@ -1,5 +1,4 @@ -#ifndef AST_CONSTANT_HPP -#define AST_CONSTANT_HPP +#pragma once #include "ast_node.hpp" @@ -14,5 +13,3 @@ class IntConstant : public Node void EmitRISC(std::ostream &stream, Context &context) const override; void Print(std::ostream &stream) const override; }; - -#endif diff --git a/include/ast_context.hpp b/include/ast_context.hpp index f0cad25..6d1ef1c 100644 --- a/include/ast_context.hpp +++ b/include/ast_context.hpp @@ -1,5 +1,4 @@ -#ifndef AST_CONTEXT_HPP -#define AST_CONTEXT_HPP +#pragma once // An object of class Context is passed between AST nodes during compilation. // This can be used to pass around information about what's currently being @@ -8,5 +7,3 @@ class Context { /* TODO decide what goes inside here */ }; - -#endif diff --git a/include/ast_direct_declarator.hpp b/include/ast_direct_declarator.hpp index 55cc8c1..0c1ebe6 100644 --- a/include/ast_direct_declarator.hpp +++ b/include/ast_direct_declarator.hpp @@ -1,5 +1,4 @@ -#ifndef AST_DIRECT_DECLARATOR_HPP -#define AST_DIRECT_DECLARATOR_HPP +#pragma once #include "ast_node.hpp" @@ -17,5 +16,3 @@ class DirectDeclarator : public Node void EmitRISC(std::ostream &stream, Context &context) const override; void Print(std::ostream &stream) const override; }; - -#endif diff --git a/include/ast_function_definition.hpp b/include/ast_function_definition.hpp index 506c99c..6cc78f8 100644 --- a/include/ast_function_definition.hpp +++ b/include/ast_function_definition.hpp @@ -1,5 +1,4 @@ -#ifndef AST_FUNCTION_DEFINITION_HPP -#define AST_FUNCTION_DEFINITION_HPP +#pragma once #include "ast_node.hpp" @@ -21,5 +20,3 @@ class FunctionDefinition : public Node void EmitRISC(std::ostream &stream, Context &context) const override; void Print(std::ostream &stream) const override; }; - -#endif diff --git a/include/ast_identifier.hpp b/include/ast_identifier.hpp index 3e080bd..ede9609 100644 --- a/include/ast_identifier.hpp +++ b/include/ast_identifier.hpp @@ -1,5 +1,4 @@ -#ifndef AST_IDENTIFIER_HPP -#define AST_IDENTIFIER_HPP +#pragma once #include "ast_node.hpp" @@ -14,5 +13,3 @@ class Identifier : public Node void EmitRISC(std::ostream &stream, Context &context) const override; void Print(std::ostream &stream) const override; }; - -#endif diff --git a/include/ast_jump_statement.hpp b/include/ast_jump_statement.hpp index 35fbf84..b845fd3 100644 --- a/include/ast_jump_statement.hpp +++ b/include/ast_jump_statement.hpp @@ -1,5 +1,4 @@ -#ifndef AST_JUMP_STATEMENT_HPP -#define AST_JUMP_STATEMENT_HPP +#pragma once #include "ast_node.hpp" @@ -18,5 +17,3 @@ class ReturnStatement : public Node void EmitRISC(std::ostream &stream, Context &context) const override; void Print(std::ostream &stream) const override; }; - -#endif diff --git a/include/ast_node.hpp b/include/ast_node.hpp index b0ebdee..e576cfb 100644 --- a/include/ast_node.hpp +++ b/include/ast_node.hpp @@ -1,5 +1,4 @@ -#ifndef AST_NODE_HPP -#define AST_NODE_HPP +#pragma once #include #include @@ -39,5 +38,3 @@ class NodeList : public Node virtual void EmitRISC(std::ostream &stream, Context &context) const override; virtual void Print(std::ostream &stream) const override; }; - -#endif diff --git a/include/ast_type_specifier.hpp b/include/ast_type_specifier.hpp index bda5719..fe93680 100644 --- a/include/ast_type_specifier.hpp +++ b/include/ast_type_specifier.hpp @@ -1,5 +1,4 @@ -#ifndef AST_TYPE_SPECIFIER -#define AST_TYPE_SPECIFIER +#pragma once #include "ast_node.hpp" @@ -14,5 +13,3 @@ class TypeSpecifier : public Node void EmitRISC(std::ostream &stream, Context &context) const override; void Print(std::ostream &stream) const override; }; - -#endif diff --git a/include/cli.h b/include/cli.h index e1ef73a..2e3750d 100644 --- a/include/cli.h +++ b/include/cli.h @@ -1,5 +1,4 @@ -#ifndef LANGPROC_COMPILER_CLI_H -#define LANGPROC_COMPILER_CLI_H +#pragma once #include #include @@ -11,5 +10,3 @@ struct CommandLineArguments }; CommandLineArguments ParseCommandLineArgs(int argc, char **argv); - -#endif From 6463126b7b67500471252e94461cca2fcd128e98 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Thu, 29 Feb 2024 23:03:54 +0000 Subject: [PATCH 02/18] Refactored use of raw pointers to std::unique_ptr, added const-ness, created AST namespace --- debugging/example-backtrace-3.cpp | 4 +- include/ast.hpp | 2 +- include/ast_constant.hpp | 8 +++- include/ast_context.hpp | 3 ++ include/ast_direct_declarator.hpp | 17 ++++---- include/ast_function_definition.hpp | 24 +++++----- include/ast_identifier.hpp | 10 +++-- include/ast_jump_statement.hpp | 16 +++---- include/ast_node.hpp | 37 +++++++--------- include/ast_type_specifier.hpp | 28 +++++++----- src/ast_constant.cpp | 8 +++- src/ast_direct_declarator.cpp | 8 +++- src/ast_function_definition.cpp | 11 +++-- src/ast_identifier.cpp | 8 +++- src/ast_jump_statement.cpp | 8 +++- src/ast_node.cpp | 22 ++++------ src/ast_type_specifier.cpp | 8 ---- src/compiler.cpp | 68 +++++++++++++++-------------- src/lexer.flex | 14 +++--- src/parser.y | 49 +++++++++------------ 20 files changed, 186 insertions(+), 167 deletions(-) delete mode 100644 src/ast_type_specifier.cpp diff --git a/debugging/example-backtrace-3.cpp b/debugging/example-backtrace-3.cpp index 9c44f09..187b239 100644 --- a/debugging/example-backtrace-3.cpp +++ b/debugging/example-backtrace-3.cpp @@ -2,9 +2,9 @@ #include #include -const char *ARRAY_OF_NUMBERS[] = { "1", "2" , "99" }; +const char* ARRAY_OF_NUMBERS[] = { "1", "2" , "99" }; -static int process_arguments(int argc, const char *argv[]) +static int process_arguments(int argc, const char* argv[]) { std::vector numbers(argc - 1); for (int i = 1 ; i < argc ; i++) { diff --git a/include/ast.hpp b/include/ast.hpp index c269fe4..a3d618b 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -13,4 +13,4 @@ #include "ast_constant.hpp" #include "ast_context.hpp" -extern Node *ParseAST(std::string file_name); +extern AST::Node* ParseAST(std::string file_name); diff --git a/include/ast_constant.hpp b/include/ast_constant.hpp index 43bbf30..455bf98 100644 --- a/include/ast_constant.hpp +++ b/include/ast_constant.hpp @@ -2,6 +2,8 @@ #include "ast_node.hpp" +namespace AST { + class IntConstant : public Node { private: @@ -10,6 +12,8 @@ class IntConstant : public Node public: IntConstant(int value) : value_(value) {} - void EmitRISC(std::ostream &stream, Context &context) const override; - void Print(std::ostream &stream) const override; + void EmitRISC(std::ostream& stream, Context& context) const override; + void Print(std::ostream& stream) const override; }; + +} // namespace AST diff --git a/include/ast_context.hpp b/include/ast_context.hpp index 6d1ef1c..e2393ee 100644 --- a/include/ast_context.hpp +++ b/include/ast_context.hpp @@ -1,5 +1,6 @@ #pragma once +namespace AST { // An object of class Context is passed between AST nodes during compilation. // This can be used to pass around information about what's currently being // compiled (e.g. function scope and variable names). @@ -7,3 +8,5 @@ class Context { /* TODO decide what goes inside here */ }; + +} // namespace AST diff --git a/include/ast_direct_declarator.hpp b/include/ast_direct_declarator.hpp index 0c1ebe6..d48611f 100644 --- a/include/ast_direct_declarator.hpp +++ b/include/ast_direct_declarator.hpp @@ -2,17 +2,18 @@ #include "ast_node.hpp" +namespace AST { + class DirectDeclarator : public Node { private: - Node *identifier_; + NodePtr identifier_; public: - DirectDeclarator(Node *identifier) : identifier_(identifier){}; - ~DirectDeclarator() - { - delete identifier_; - }; - void EmitRISC(std::ostream &stream, Context &context) const override; - void Print(std::ostream &stream) const override; + DirectDeclarator(Node* identifier) : identifier_(identifier){}; + + void EmitRISC(std::ostream& stream, Context& context) const override; + void Print(std::ostream& stream) const override; }; + +} // namespace AST diff --git a/include/ast_function_definition.hpp b/include/ast_function_definition.hpp index 6cc78f8..703e527 100644 --- a/include/ast_function_definition.hpp +++ b/include/ast_function_definition.hpp @@ -1,22 +1,22 @@ #pragma once #include "ast_node.hpp" +#include "ast_type_specifier.hpp" + +namespace AST { class FunctionDefinition : public Node { private: - Node *declaration_specifiers_; - Node *declarator_; - Node *compound_statement_; + const TypeSpecifier declaration_specifiers_; + NodePtr declarator_; + NodePtr compound_statement_; public: - FunctionDefinition(Node *declaration_specifiers, Node *declarator, Node *compound_statement) : declaration_specifiers_(declaration_specifiers), declarator_(declarator), compound_statement_(compound_statement){}; - ~FunctionDefinition() - { - delete declaration_specifiers_; - delete declarator_; - delete compound_statement_; - }; - void EmitRISC(std::ostream &stream, Context &context) const override; - void Print(std::ostream &stream) const override; + FunctionDefinition(TypeSpecifier declaration_specifiers, Node* declarator, Node* compound_statement) : declaration_specifiers_(declaration_specifiers), declarator_(declarator), compound_statement_(compound_statement){}; + + void EmitRISC(std::ostream& stream, Context& context) const override; + void Print(std::ostream& stream) const override; }; + +} // namespace AST diff --git a/include/ast_identifier.hpp b/include/ast_identifier.hpp index ede9609..2ec2577 100644 --- a/include/ast_identifier.hpp +++ b/include/ast_identifier.hpp @@ -2,6 +2,8 @@ #include "ast_node.hpp" +namespace AST { + class Identifier : public Node { private: @@ -9,7 +11,9 @@ class Identifier : public Node public: Identifier(std::string identifier) : identifier_(identifier){}; - ~Identifier(){}; - void EmitRISC(std::ostream &stream, Context &context) const override; - void Print(std::ostream &stream) const override; + + void EmitRISC(std::ostream& stream, Context& context) const override; + void Print(std::ostream& stream) const override; }; + +} // namespace AST diff --git a/include/ast_jump_statement.hpp b/include/ast_jump_statement.hpp index b845fd3..4340e8f 100644 --- a/include/ast_jump_statement.hpp +++ b/include/ast_jump_statement.hpp @@ -2,18 +2,18 @@ #include "ast_node.hpp" +namespace AST { + class ReturnStatement : public Node { private: - Node *expression_; + NodePtr expression_; public: - ReturnStatement(Node *expression) : expression_(expression) {} - ~ReturnStatement() - { - delete expression_; - }; + ReturnStatement(Node* expression) : expression_(expression) {} - void EmitRISC(std::ostream &stream, Context &context) const override; - void Print(std::ostream &stream) const override; + void EmitRISC(std::ostream& stream, Context& context) const override; + void Print(std::ostream& stream) const override; }; + +} // namespace AST diff --git a/include/ast_node.hpp b/include/ast_node.hpp index e576cfb..39b3a4e 100644 --- a/include/ast_node.hpp +++ b/include/ast_node.hpp @@ -1,40 +1,35 @@ #pragma once #include +#include #include #include "ast_context.hpp" +namespace AST { + class Node { -protected: - std::vector branches_; - public: - Node(){}; - virtual ~Node(); - virtual void EmitRISC(std::ostream &stream, Context &context) const = 0; - virtual void Print(std::ostream &stream) const = 0; + virtual ~Node() {} + virtual void EmitRISC(std::ostream& stream, Context& context) const = 0; + virtual void Print(std::ostream& stream) const = 0; }; +using NodePtr = std::unique_ptr; + // Represents a list of nodes. class NodeList : public Node { private: - std::vector nodes_; + std::vector nodes_; public: - NodeList(Node *first_node) : nodes_({first_node}) {} - - ~NodeList() - { - for (auto node : nodes_) - { - delete node; - } - } - - void PushBack(Node *item); - virtual void EmitRISC(std::ostream &stream, Context &context) const override; - virtual void Print(std::ostream &stream) const override; + NodeList(Node* first_node) { nodes_.emplace_back(first_node); } + + void PushBack(Node* item); + virtual void EmitRISC(std::ostream& stream, Context& context) const override; + virtual void Print(std::ostream& stream) const override; }; + +} // namespace AST diff --git a/include/ast_type_specifier.hpp b/include/ast_type_specifier.hpp index fe93680..40e373b 100644 --- a/include/ast_type_specifier.hpp +++ b/include/ast_type_specifier.hpp @@ -1,15 +1,23 @@ #pragma once -#include "ast_node.hpp" +#include +#include -class TypeSpecifier : public Node +namespace AST { + +enum class TypeSpecifier { -private: - std::string type_; - -public: - TypeSpecifier(std::string type) : type_(type){}; - ~TypeSpecifier(){}; - void EmitRISC(std::ostream &stream, Context &context) const override; - void Print(std::ostream &stream) const override; + INT }; + +constexpr std::string_view ToString(TypeSpecifier type) +{ + switch (type) + { + case TypeSpecifier::INT: + return "int"; + } + throw std::runtime_error("Unexpected type specifier"); +} + +} diff --git a/src/ast_constant.cpp b/src/ast_constant.cpp index ac47791..7e21695 100644 --- a/src/ast_constant.cpp +++ b/src/ast_constant.cpp @@ -1,11 +1,15 @@ #include "ast_constant.hpp" -void IntConstant::EmitRISC(std::ostream &stream, Context &context) const +namespace AST { + +void IntConstant::EmitRISC(std::ostream& stream, Context& context) const { stream << "li a0, " << value_ << std::endl; } -void IntConstant::Print(std::ostream &stream) const +void IntConstant::Print(std::ostream& stream) const { stream << value_; } + +} // namespace AST diff --git a/src/ast_direct_declarator.cpp b/src/ast_direct_declarator.cpp index 92fe8fc..8c5f7ea 100644 --- a/src/ast_direct_declarator.cpp +++ b/src/ast_direct_declarator.cpp @@ -1,12 +1,16 @@ #include "ast_direct_declarator.hpp" -void DirectDeclarator::EmitRISC(std::ostream &stream, Context &context) const +namespace AST { + +void DirectDeclarator::EmitRISC(std::ostream& stream, Context& context) const { identifier_->EmitRISC(stream, context); stream << ":" << std::endl; } -void DirectDeclarator::Print(std::ostream &stream) const +void DirectDeclarator::Print(std::ostream& stream) const { identifier_->Print(stream); } + +} // namespace AST diff --git a/src/ast_function_definition.cpp b/src/ast_function_definition.cpp index 2eaf6c6..11e1adb 100644 --- a/src/ast_function_definition.cpp +++ b/src/ast_function_definition.cpp @@ -1,6 +1,8 @@ #include "ast_function_definition.hpp" -void FunctionDefinition::EmitRISC(std::ostream &stream, Context &context) const +namespace AST { + +void FunctionDefinition::EmitRISC(std::ostream& stream, Context& context) const { // Emit assembler directives. // TODO: these are just examples ones, make sure you understand @@ -16,10 +18,9 @@ void FunctionDefinition::EmitRISC(std::ostream &stream, Context &context) const } } -void FunctionDefinition::Print(std::ostream &stream) const +void FunctionDefinition::Print(std::ostream& stream) const { - declaration_specifiers_->Print(stream); - stream << " "; + stream << ToString(declaration_specifiers_) << " "; declarator_->Print(stream); stream << "() {" << std::endl; @@ -30,3 +31,5 @@ void FunctionDefinition::Print(std::ostream &stream) const } stream << "}" << std::endl; } + +} diff --git a/src/ast_identifier.cpp b/src/ast_identifier.cpp index cf54d34..b4cd8b0 100644 --- a/src/ast_identifier.cpp +++ b/src/ast_identifier.cpp @@ -1,11 +1,15 @@ #include "ast_identifier.hpp" -void Identifier::EmitRISC(std::ostream &stream, Context &context) const +namespace AST { + +void Identifier::EmitRISC(std::ostream& stream, Context& context) const { stream << identifier_; } -void Identifier::Print(std::ostream &stream) const +void Identifier::Print(std::ostream& stream) const { stream << identifier_; }; + +} diff --git a/src/ast_jump_statement.cpp b/src/ast_jump_statement.cpp index 6a8c499..9799a77 100644 --- a/src/ast_jump_statement.cpp +++ b/src/ast_jump_statement.cpp @@ -1,6 +1,8 @@ #include "ast_jump_statement.hpp" -void ReturnStatement::EmitRISC(std::ostream &stream, Context &context) const +namespace AST { + +void ReturnStatement::EmitRISC(std::ostream& stream, Context& context) const { if (expression_ != nullptr) { @@ -9,7 +11,7 @@ void ReturnStatement::EmitRISC(std::ostream &stream, Context &context) const stream << "ret" << std::endl; } -void ReturnStatement::Print(std::ostream &stream) const +void ReturnStatement::Print(std::ostream& stream) const { stream << "return"; if (expression_ != nullptr) @@ -19,3 +21,5 @@ void ReturnStatement::Print(std::ostream &stream) const } stream << ";" << std::endl; } + +} diff --git a/src/ast_node.cpp b/src/ast_node.cpp index e3ab627..a64fe27 100644 --- a/src/ast_node.cpp +++ b/src/ast_node.cpp @@ -1,21 +1,15 @@ #include "ast_node.hpp" -Node::~Node() -{ - for (auto branch : branches_) - { - delete branch; - } -} +namespace AST { -void NodeList::PushBack(Node *item) +void NodeList::PushBack(Node* item) { - nodes_.push_back(item); + nodes_.emplace_back(item); } -void NodeList::EmitRISC(std::ostream &stream, Context &context) const +void NodeList::EmitRISC(std::ostream& stream, Context& context) const { - for (auto node : nodes_) + for (const auto& node : nodes_) { if (node == nullptr) { @@ -25,9 +19,9 @@ void NodeList::EmitRISC(std::ostream &stream, Context &context) const } } -void NodeList::Print(std::ostream &stream) const +void NodeList::Print(std::ostream& stream) const { - for (auto node : nodes_) + for (const auto& node : nodes_) { if (node == nullptr) { @@ -36,3 +30,5 @@ void NodeList::Print(std::ostream &stream) const node->Print(stream); } } + +} diff --git a/src/ast_type_specifier.cpp b/src/ast_type_specifier.cpp deleted file mode 100644 index 71ba511..0000000 --- a/src/ast_type_specifier.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include "ast_type_specifier.hpp" - -void TypeSpecifier::EmitRISC(std::ostream &stream, Context &context) const {} - -void TypeSpecifier::Print(std::ostream &stream) const -{ - stream << type_; -} diff --git a/src/compiler.cpp b/src/compiler.cpp index 1d41250..bbb8880 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -4,41 +4,17 @@ #include "cli.h" #include "ast.hpp" -Node *Parse(CommandLineArguments &args) -{ - std::cout << "Parsing: " << args.compile_source_path << std::endl; - auto root = ParseAST(args.compile_source_path); - std::cout << "AST parsing complete" << std::endl; - return root; -} +using AST::NodePtr; + +NodePtr Parse(const CommandLineArguments& args); // Output the pretty print version of what was parsed to the .printed output // file. -void PrettyPrint(Node *root, CommandLineArguments &args) -{ - auto output_path = args.compile_output_path + ".printed"; - - std::cout << "Printing parsed AST..." << std::endl; - std::ofstream output(output_path, std::ios::trunc); - root->Print(output); - output.close(); - std::cout << "Printed parsed AST to: " << output_path << std::endl; -} +void PrettyPrint(const NodePtr& root, const CommandLineArguments& args); // Compile from the root of the AST and output this to the // args.compiledOutputPath file. -void Compile(Node *root, CommandLineArguments &args) -{ - // Create a Context. This can be used to pass around information about - // what's currently being compiled (e.g. function scope and variable names). - Context ctx; - - std::cout << "Compiling parsed AST..." << std::endl; - std::ofstream output(args.compile_output_path, std::ios::trunc); - root->EmitRISC(output, ctx); - output.close(); - std::cout << "Compiled to: " << args.compile_output_path << std::endl; -} +void Compile(const NodePtr& root, const CommandLineArguments& args); int main(int argc, char **argv) { @@ -58,8 +34,36 @@ int main(int argc, char **argv) PrettyPrint(ast_root, command_line_arguments); Compile(ast_root, command_line_arguments); +} - // Clean up afterwards. - delete ast_root; - return 0; +NodePtr Parse(const CommandLineArguments& args) +{ + std::cout << "Parsing: " << args.compile_source_path << std::endl; + NodePtr root{ ParseAST(args.compile_source_path) }; + std::cout << "AST parsing complete" << std::endl; + return root; +} + +void PrettyPrint(const NodePtr& root, const CommandLineArguments& args) +{ + auto output_path = args.compile_output_path + ".printed"; + + std::cout << "Printing parsed AST..." << std::endl; + std::ofstream output(output_path, std::ios::trunc); + root->Print(output); + output.close(); + std::cout << "Printed parsed AST to: " << output_path << std::endl; +} + +void Compile(const NodePtr& root, const CommandLineArguments& args) +{ + // Create a Context. This can be used to pass around information about + // what's currently being compiled (e.g. function scope and variable names). + AST::Context ctx; + + std::cout << "Compiling parsed AST..." << std::endl; + std::ofstream output(args.compile_output_path, std::ios::trunc); + root->EmitRISC(output, ctx); + output.close(); + std::cout << "Compiled to: " << args.compile_output_path << std::endl; } diff --git a/src/lexer.flex b/src/lexer.flex index 120fe38..2d7ee2f 100644 --- a/src/lexer.flex +++ b/src/lexer.flex @@ -54,14 +54,14 @@ IS (u|U|l|L)* {L}({L}|{D})* {yylval.string = new std::string(yytext); return(IDENTIFIER);} -0[xX]{H}+{IS}? {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -0{D}+{IS}? {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -{D}+{IS}? {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -L?'(\\.|[^\\'])+' {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +0[xX]{H}+{IS}? {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +0{D}+{IS}? {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +{D}+{IS}? {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +L?'(\\.|[^\\'])+' {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -{D}+{E}{FS}? {yylval.number_float = strtod(yytext, NULL); return(FLOAT_CONSTANT);} -{D}*"."{D}+({E})?{FS}? {yylval.number_float = strtod(yytext, NULL); return(FLOAT_CONSTANT);} -{D}+"."{D}*({E})?{FS}? {yylval.number_float = strtod(yytext, NULL); return(FLOAT_CONSTANT);} +{D}+{E}{FS}? {yylval.numberFloat = strtod(yytext, NULL); return(FLOAT_CONSTANT);} +{D}*"."{D}+({E})?{FS}? {yylval.numberFloat = strtod(yytext, NULL); return(FLOAT_CONSTANT);} +{D}+"."{D}*({E})?{FS}? {yylval.numberFloat = strtod(yytext, NULL); return(FLOAT_CONSTANT);} L?\"(\\.|[^\\"])*\" {/* TODO process string literal */; return(STRING_LITERAL);} diff --git a/src/parser.y b/src/parser.y index 4243310..75c7539 100644 --- a/src/parser.y +++ b/src/parser.y @@ -7,20 +7,22 @@ %code requires{ #include "ast.hpp" - extern Node *g_root; - extern FILE *yyin; + using namespace AST; + + extern Node* g_root; + extern FILE* yyin; int yylex(void); - void yyerror(const char *); + void yyerror(const char*); } -// Represents the value associated with any kind of AST node. %union{ - Node *node; - NodeList *nodes; - int number_int; - double number_float; - std::string *string; - yytokentype token; + Node* node; + NodeList* nodeList; + int numberInt; + double numberFloat; + std::string* string; + TypeSpecifier typeSpecifier; + yytokentype token; } %token IDENTIFIER INT_CONSTANT FLOAT_CONSTANT STRING_LITERAL @@ -31,22 +33,17 @@ %token STRUCT UNION ENUM ELLIPSIS %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN -%type translation_unit external_declaration function_definition primary_expression postfix_expression argument_expression_list +%type translation_unit external_declaration function_definition primary_expression postfix_expression %type unary_expression cast_expression multiplicative_expression additive_expression shift_expression relational_expression %type equality_expression and_expression exclusive_or_expression inclusive_or_expression logical_and_expression logical_or_expression -%type conditional_expression assignment_expression expression constant_expression declaration declaration_specifiers init_declarator_list -%type init_declarator type_specifier struct_specifier struct_declaration_list struct_declaration specifier_qualifier_list struct_declarator_list -%type struct_declarator enum_specifier enumerator_list enumerator declarator direct_declarator pointer parameter_list parameter_declaration -%type identifier_list type_name abstract_declarator direct_abstract_declarator initializer initializer_list statement labeled_statement -%type compound_statement declaration_list expression_statement selection_statement iteration_statement jump_statement - -%type statement_list +%type conditional_expression assignment_expression expression declarator direct_declarator statement compound_statement jump_statement -%type unary_operator assignment_operator storage_class_specifier +%type statement_list -%type INT_CONSTANT STRING_LITERAL -%type FLOAT_CONSTANT +%type INT_CONSTANT STRING_LITERAL +%type FLOAT_CONSTANT %type IDENTIFIER +%type type_specifier declaration_specifiers %start ROOT @@ -75,7 +72,7 @@ declaration_specifiers type_specifier : INT { - $$ = new TypeSpecifier("int"); + $$ = TypeSpecifier::INT; } ; @@ -125,10 +122,6 @@ postfix_expression : primary_expression ; -argument_expression_list - : assignment_expression - ; - unary_expression : postfix_expression ; @@ -191,9 +184,9 @@ expression %% -Node *g_root; +Node* g_root; -Node *ParseAST(std::string file_name) +Node* ParseAST(std::string file_name) { yyin = fopen(file_name.c_str(), "r"); if(yyin == NULL){ From b779b0b295d52447c8877677728da5d6ffcb8f09 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Fri, 1 Mar 2024 22:45:57 +0000 Subject: [PATCH 03/18] added comment and valgrind dependency --- Dockerfile | 3 ++- include/ast_node.hpp | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index afaf3f2..1e7a943 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,8 @@ RUN apt-get update && apt-get install -y --fix-missing \ curl \ device-tree-compiler \ lcov \ - nano + nano \ + valgrind # Install RISC-V Toolchain WORKDIR /tmp diff --git a/include/ast_node.hpp b/include/ast_node.hpp index 39b3a4e..7710040 100644 --- a/include/ast_node.hpp +++ b/include/ast_node.hpp @@ -16,9 +16,11 @@ class Node virtual void Print(std::ostream& stream) const = 0; }; +// If you don't feel comfortable using std::unique_ptr, you can switch NodePtr to be defined +// as a raw pointer instead here and your project should still compile, although you'll need +// to add destructors to avoid leaking memory using NodePtr = std::unique_ptr; -// Represents a list of nodes. class NodeList : public Node { private: From d8d7d80acdacf997632dc8d90cc436744c9ae766 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Fri, 1 Mar 2024 22:47:42 +0000 Subject: [PATCH 04/18] Removing asan from makefile as it doesn't work with valgrind --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d98ff5e..2016d4c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Based on https://stackoverflow.com/a/52036564 which is well worth reading! -CXXFLAGS += -std=c++20 -W -Wall -g -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -fsanitize=address -static-libasan -O0 -rdynamic --coverage -I include +CXXFLAGS += -std=c++20 -W -Wall -g -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -O0 -rdynamic --coverage -I include SOURCES := $(wildcard src/*.cpp) DEPENDENCIES := $(patsubst src/%.cpp,build/%.d,$(SOURCES)) From f5a5f2f6f5e8edd36e33bc37814e14903246c685 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Fri, 1 Mar 2024 23:46:07 +0000 Subject: [PATCH 05/18] Solving memory leaks --- src/parser.y | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/parser.y b/src/parser.y index 75c7539..8e14f36 100644 --- a/src/parser.y +++ b/src/parser.y @@ -13,6 +13,7 @@ extern FILE* yyin; int yylex(void); void yyerror(const char*); + int yylex_destroy(void); } %union{ @@ -195,5 +196,7 @@ Node* ParseAST(std::string file_name) } g_root = nullptr; yyparse(); + yylex_destroy(); + fclose(yyin); return g_root; } From 53f775b2263fe49810846714ba8cfba9a82e41b7 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Fri, 1 Mar 2024 23:56:22 +0000 Subject: [PATCH 06/18] Fixed memory leaks *properly* --- src/parser.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.y b/src/parser.y index 8e14f36..b0e5a98 100644 --- a/src/parser.y +++ b/src/parser.y @@ -196,7 +196,7 @@ Node* ParseAST(std::string file_name) } g_root = nullptr; yyparse(); - yylex_destroy(); fclose(yyin); + yylex_destroy(); return g_root; } From af4ba882151753071e0a8a0c0aa128d1ac61c787 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Mon, 4 Mar 2024 20:19:40 +0000 Subject: [PATCH 07/18] Added asan flags back to makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3023171..30bcb2f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Based on https://stackoverflow.com/a/52036564 which is well worth reading! -CXXFLAGS += -std=c++20 -W -Wall -g -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -O0 -rdynamic --coverage -I include +CXXFLAGS += -std=c++20 -W -Wall -g -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -fsanitize=address -static-libasan -O0 -rdynamic --coverage -I include SOURCES := $(wildcard src/*.cpp) DEPENDENCIES := $(patsubst src/%.cpp,build/%.d,$(SOURCES)) From c2b5042e9a0eea9e8fda8fd12f734846367f3bc0 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Mon, 4 Mar 2024 23:44:01 +0100 Subject: [PATCH 08/18] Renamed parser types --- src/lexer.flex | 14 +++++++------- src/parser.y | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/lexer.flex b/src/lexer.flex index 2d7ee2f..120fe38 100644 --- a/src/lexer.flex +++ b/src/lexer.flex @@ -54,14 +54,14 @@ IS (u|U|l|L)* {L}({L}|{D})* {yylval.string = new std::string(yytext); return(IDENTIFIER);} -0[xX]{H}+{IS}? {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -0{D}+{IS}? {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -{D}+{IS}? {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -L?'(\\.|[^\\'])+' {yylval.numberInt = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +0[xX]{H}+{IS}? {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +0{D}+{IS}? {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +{D}+{IS}? {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} +L?'(\\.|[^\\'])+' {yylval.number_int = (int)strtol(yytext, NULL, 0); return(INT_CONSTANT);} -{D}+{E}{FS}? {yylval.numberFloat = strtod(yytext, NULL); return(FLOAT_CONSTANT);} -{D}*"."{D}+({E})?{FS}? {yylval.numberFloat = strtod(yytext, NULL); return(FLOAT_CONSTANT);} -{D}+"."{D}*({E})?{FS}? {yylval.numberFloat = strtod(yytext, NULL); return(FLOAT_CONSTANT);} +{D}+{E}{FS}? {yylval.number_float = strtod(yytext, NULL); return(FLOAT_CONSTANT);} +{D}*"."{D}+({E})?{FS}? {yylval.number_float = strtod(yytext, NULL); return(FLOAT_CONSTANT);} +{D}+"."{D}*({E})?{FS}? {yylval.number_float = strtod(yytext, NULL); return(FLOAT_CONSTANT);} L?\"(\\.|[^\\"])*\" {/* TODO process string literal */; return(STRING_LITERAL);} diff --git a/src/parser.y b/src/parser.y index b0e5a98..9dd1fe8 100644 --- a/src/parser.y +++ b/src/parser.y @@ -18,11 +18,11 @@ %union{ Node* node; - NodeList* nodeList; - int numberInt; - double numberFloat; + NodeList* node_list; + int number_int; + double number_float; std::string* string; - TypeSpecifier typeSpecifier; + TypeSpecifier type_specifier; yytokentype token; } @@ -39,12 +39,12 @@ %type equality_expression and_expression exclusive_or_expression inclusive_or_expression logical_and_expression logical_or_expression %type conditional_expression assignment_expression expression declarator direct_declarator statement compound_statement jump_statement -%type statement_list +%type statement_list -%type INT_CONSTANT STRING_LITERAL -%type FLOAT_CONSTANT +%type INT_CONSTANT STRING_LITERAL +%type FLOAT_CONSTANT %type IDENTIFIER -%type type_specifier declaration_specifiers +%type type_specifier declaration_specifiers %start ROOT From 6f51c7eca43911e5a7d4a3e1440f7b03f72588de Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Thu, 7 Mar 2024 22:45:56 +0000 Subject: [PATCH 09/18] Addressed some PR comments --- include/ast.hpp | 2 +- include/ast_constant.hpp | 4 ++-- include/ast_context.hpp | 6 +++--- include/ast_direct_declarator.hpp | 4 ++-- include/ast_function_definition.hpp | 4 ++-- include/ast_identifier.hpp | 4 ++-- include/ast_jump_statement.hpp | 4 ++-- include/ast_node.hpp | 4 ++-- include/ast_type_specifier.hpp | 2 +- src/ast_constant.cpp | 4 ++-- src/ast_direct_declarator.cpp | 4 ++-- src/ast_function_definition.cpp | 2 +- src/ast_identifier.cpp | 2 +- src/ast_jump_statement.cpp | 2 +- src/ast_node.cpp | 2 +- src/compiler.cpp | 4 ++-- src/parser.y | 2 +- src/parser_full.y.example | 10 +++++----- 18 files changed, 33 insertions(+), 33 deletions(-) diff --git a/include/ast.hpp b/include/ast.hpp index a3d618b..3c3321d 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -13,4 +13,4 @@ #include "ast_constant.hpp" #include "ast_context.hpp" -extern AST::Node* ParseAST(std::string file_name); +extern ast::Node* ParseAST(std::string file_name); diff --git a/include/ast_constant.hpp b/include/ast_constant.hpp index 455bf98..7f30138 100644 --- a/include/ast_constant.hpp +++ b/include/ast_constant.hpp @@ -2,7 +2,7 @@ #include "ast_node.hpp" -namespace AST { +namespace ast { class IntConstant : public Node { @@ -16,4 +16,4 @@ class IntConstant : public Node void Print(std::ostream& stream) const override; }; -} // namespace AST +} // namespace ast diff --git a/include/ast_context.hpp b/include/ast_context.hpp index e2393ee..a00dae9 100644 --- a/include/ast_context.hpp +++ b/include/ast_context.hpp @@ -1,7 +1,7 @@ #pragma once -namespace AST { -// An object of class Context is passed between AST nodes during compilation. +namespace ast { +// An object of class Context is passed between ast nodes during compilation. // This can be used to pass around information about what's currently being // compiled (e.g. function scope and variable names). class Context @@ -9,4 +9,4 @@ class Context /* TODO decide what goes inside here */ }; -} // namespace AST +} // namespace ast diff --git a/include/ast_direct_declarator.hpp b/include/ast_direct_declarator.hpp index d48611f..389ff9f 100644 --- a/include/ast_direct_declarator.hpp +++ b/include/ast_direct_declarator.hpp @@ -2,7 +2,7 @@ #include "ast_node.hpp" -namespace AST { +namespace ast { class DirectDeclarator : public Node { @@ -16,4 +16,4 @@ class DirectDeclarator : public Node void Print(std::ostream& stream) const override; }; -} // namespace AST +} // namespace ast diff --git a/include/ast_function_definition.hpp b/include/ast_function_definition.hpp index 703e527..f96c33d 100644 --- a/include/ast_function_definition.hpp +++ b/include/ast_function_definition.hpp @@ -3,7 +3,7 @@ #include "ast_node.hpp" #include "ast_type_specifier.hpp" -namespace AST { +namespace ast { class FunctionDefinition : public Node { @@ -19,4 +19,4 @@ class FunctionDefinition : public Node void Print(std::ostream& stream) const override; }; -} // namespace AST +} // namespace ast diff --git a/include/ast_identifier.hpp b/include/ast_identifier.hpp index 2ec2577..d0539ae 100644 --- a/include/ast_identifier.hpp +++ b/include/ast_identifier.hpp @@ -2,7 +2,7 @@ #include "ast_node.hpp" -namespace AST { +namespace ast { class Identifier : public Node { @@ -16,4 +16,4 @@ class Identifier : public Node void Print(std::ostream& stream) const override; }; -} // namespace AST +} // namespace ast diff --git a/include/ast_jump_statement.hpp b/include/ast_jump_statement.hpp index 4340e8f..a0d0108 100644 --- a/include/ast_jump_statement.hpp +++ b/include/ast_jump_statement.hpp @@ -2,7 +2,7 @@ #include "ast_node.hpp" -namespace AST { +namespace ast { class ReturnStatement : public Node { @@ -16,4 +16,4 @@ class ReturnStatement : public Node void Print(std::ostream& stream) const override; }; -} // namespace AST +} // namespace ast diff --git a/include/ast_node.hpp b/include/ast_node.hpp index 7710040..76b4fbd 100644 --- a/include/ast_node.hpp +++ b/include/ast_node.hpp @@ -6,7 +6,7 @@ #include "ast_context.hpp" -namespace AST { +namespace ast { class Node { @@ -34,4 +34,4 @@ class NodeList : public Node virtual void Print(std::ostream& stream) const override; }; -} // namespace AST +} // namespace ast diff --git a/include/ast_type_specifier.hpp b/include/ast_type_specifier.hpp index 40e373b..35d9c5d 100644 --- a/include/ast_type_specifier.hpp +++ b/include/ast_type_specifier.hpp @@ -3,7 +3,7 @@ #include #include -namespace AST { +namespace ast { enum class TypeSpecifier { diff --git a/src/ast_constant.cpp b/src/ast_constant.cpp index 7e21695..21759c0 100644 --- a/src/ast_constant.cpp +++ b/src/ast_constant.cpp @@ -1,6 +1,6 @@ #include "ast_constant.hpp" -namespace AST { +namespace ast { void IntConstant::EmitRISC(std::ostream& stream, Context& context) const { @@ -12,4 +12,4 @@ void IntConstant::Print(std::ostream& stream) const stream << value_; } -} // namespace AST +} // namespace ast diff --git a/src/ast_direct_declarator.cpp b/src/ast_direct_declarator.cpp index 8c5f7ea..ab0e19c 100644 --- a/src/ast_direct_declarator.cpp +++ b/src/ast_direct_declarator.cpp @@ -1,6 +1,6 @@ #include "ast_direct_declarator.hpp" -namespace AST { +namespace ast { void DirectDeclarator::EmitRISC(std::ostream& stream, Context& context) const { @@ -13,4 +13,4 @@ void DirectDeclarator::Print(std::ostream& stream) const identifier_->Print(stream); } -} // namespace AST +} // namespace ast diff --git a/src/ast_function_definition.cpp b/src/ast_function_definition.cpp index 11e1adb..17cc200 100644 --- a/src/ast_function_definition.cpp +++ b/src/ast_function_definition.cpp @@ -1,6 +1,6 @@ #include "ast_function_definition.hpp" -namespace AST { +namespace ast { void FunctionDefinition::EmitRISC(std::ostream& stream, Context& context) const { diff --git a/src/ast_identifier.cpp b/src/ast_identifier.cpp index b4cd8b0..b598fe1 100644 --- a/src/ast_identifier.cpp +++ b/src/ast_identifier.cpp @@ -1,6 +1,6 @@ #include "ast_identifier.hpp" -namespace AST { +namespace ast { void Identifier::EmitRISC(std::ostream& stream, Context& context) const { diff --git a/src/ast_jump_statement.cpp b/src/ast_jump_statement.cpp index 9799a77..bd90e85 100644 --- a/src/ast_jump_statement.cpp +++ b/src/ast_jump_statement.cpp @@ -1,6 +1,6 @@ #include "ast_jump_statement.hpp" -namespace AST { +namespace ast { void ReturnStatement::EmitRISC(std::ostream& stream, Context& context) const { diff --git a/src/ast_node.cpp b/src/ast_node.cpp index a64fe27..50fecfb 100644 --- a/src/ast_node.cpp +++ b/src/ast_node.cpp @@ -1,6 +1,6 @@ #include "ast_node.hpp" -namespace AST { +namespace ast { void NodeList::PushBack(Node* item) { diff --git a/src/compiler.cpp b/src/compiler.cpp index bbb8880..2794b94 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -4,7 +4,7 @@ #include "cli.h" #include "ast.hpp" -using AST::NodePtr; +using ast::NodePtr; NodePtr Parse(const CommandLineArguments& args); @@ -59,7 +59,7 @@ void Compile(const NodePtr& root, const CommandLineArguments& args) { // Create a Context. This can be used to pass around information about // what's currently being compiled (e.g. function scope and variable names). - AST::Context ctx; + ast::Context ctx; std::cout << "Compiling parsed AST..." << std::endl; std::ofstream output(args.compile_output_path, std::ios::trunc); diff --git a/src/parser.y b/src/parser.y index 9dd1fe8..f3f286e 100644 --- a/src/parser.y +++ b/src/parser.y @@ -7,7 +7,7 @@ %code requires{ #include "ast.hpp" - using namespace AST; + using namespace ast; extern Node* g_root; extern FILE* yyin; diff --git a/src/parser_full.y.example b/src/parser_full.y.example index 29a4148..1861858 100644 --- a/src/parser_full.y.example +++ b/src/parser_full.y.example @@ -3,19 +3,19 @@ %code requires{ #include "ast.hpp" - extern Node *g_root; - extern FILE *yyin; + extern Node* g_root; + extern FILE* yyin; int yylex(void); void yyerror(const char *); } // Represents the value associated with any kind of AST node. %union{ - Node *node; - NodeList *nodes; + Node* node; + NodeList* nodes; int number_int; double number_float; - std::string *string; + std::string* string; yytokentype token; } From 44cea4b13891f74a2aa3e9fe66c34c7a8cc08461 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Thu, 7 Mar 2024 22:59:18 +0000 Subject: [PATCH 10/18] NodePtr *all* the things --- include/ast.hpp | 2 +- include/ast_direct_declarator.hpp | 2 +- include/ast_function_definition.hpp | 2 +- include/ast_identifier.hpp | 2 +- include/ast_jump_statement.hpp | 2 +- include/ast_node.hpp | 4 ++-- src/ast_node.cpp | 4 ++-- src/compiler.cpp | 2 +- src/parser.y | 14 +++++++------- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/ast.hpp b/include/ast.hpp index 3c3321d..23cd246 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -13,4 +13,4 @@ #include "ast_constant.hpp" #include "ast_context.hpp" -extern ast::Node* ParseAST(std::string file_name); +ast::NodePtr ParseAST(std::string file_name); diff --git a/include/ast_direct_declarator.hpp b/include/ast_direct_declarator.hpp index 389ff9f..d14cbe2 100644 --- a/include/ast_direct_declarator.hpp +++ b/include/ast_direct_declarator.hpp @@ -10,7 +10,7 @@ class DirectDeclarator : public Node NodePtr identifier_; public: - DirectDeclarator(Node* identifier) : identifier_(identifier){}; + DirectDeclarator(NodePtr identifier) : identifier_(std::move(identifier)){}; void EmitRISC(std::ostream& stream, Context& context) const override; void Print(std::ostream& stream) const override; diff --git a/include/ast_function_definition.hpp b/include/ast_function_definition.hpp index f96c33d..e98730f 100644 --- a/include/ast_function_definition.hpp +++ b/include/ast_function_definition.hpp @@ -13,7 +13,7 @@ class FunctionDefinition : public Node NodePtr compound_statement_; public: - FunctionDefinition(TypeSpecifier declaration_specifiers, Node* declarator, Node* compound_statement) : declaration_specifiers_(declaration_specifiers), declarator_(declarator), compound_statement_(compound_statement){}; + FunctionDefinition(TypeSpecifier declaration_specifiers, NodePtr declarator, NodePtr compound_statement) : declaration_specifiers_(declaration_specifiers), declarator_(std::move(declarator)), compound_statement_(std::move(compound_statement)){}; void EmitRISC(std::ostream& stream, Context& context) const override; void Print(std::ostream& stream) const override; diff --git a/include/ast_identifier.hpp b/include/ast_identifier.hpp index d0539ae..41da20e 100644 --- a/include/ast_identifier.hpp +++ b/include/ast_identifier.hpp @@ -10,7 +10,7 @@ class Identifier : public Node std::string identifier_; public: - Identifier(std::string identifier) : identifier_(identifier){}; + Identifier(std::string identifier) : identifier_(std::move(identifier)){}; void EmitRISC(std::ostream& stream, Context& context) const override; void Print(std::ostream& stream) const override; diff --git a/include/ast_jump_statement.hpp b/include/ast_jump_statement.hpp index a0d0108..e83cfcc 100644 --- a/include/ast_jump_statement.hpp +++ b/include/ast_jump_statement.hpp @@ -10,7 +10,7 @@ class ReturnStatement : public Node NodePtr expression_; public: - ReturnStatement(Node* expression) : expression_(expression) {} + ReturnStatement(NodePtr expression) : expression_(std::move(expression)) {} void EmitRISC(std::ostream& stream, Context& context) const override; void Print(std::ostream& stream) const override; diff --git a/include/ast_node.hpp b/include/ast_node.hpp index 76b4fbd..4025b5f 100644 --- a/include/ast_node.hpp +++ b/include/ast_node.hpp @@ -27,9 +27,9 @@ class NodeList : public Node std::vector nodes_; public: - NodeList(Node* first_node) { nodes_.emplace_back(first_node); } + NodeList(NodePtr first_node) { nodes_.push_back(std::move(first_node)); } - void PushBack(Node* item); + void PushBack(NodePtr item); virtual void EmitRISC(std::ostream& stream, Context& context) const override; virtual void Print(std::ostream& stream) const override; }; diff --git a/src/ast_node.cpp b/src/ast_node.cpp index 50fecfb..74ef492 100644 --- a/src/ast_node.cpp +++ b/src/ast_node.cpp @@ -2,9 +2,9 @@ namespace ast { -void NodeList::PushBack(Node* item) +void NodeList::PushBack(NodePtr item) { - nodes_.emplace_back(item); + nodes_.push_back(std::move(item)); } void NodeList::EmitRISC(std::ostream& stream, Context& context) const diff --git a/src/compiler.cpp b/src/compiler.cpp index 2794b94..865673b 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -39,7 +39,7 @@ int main(int argc, char **argv) NodePtr Parse(const CommandLineArguments& args) { std::cout << "Parsing: " << args.compile_source_path << std::endl; - NodePtr root{ ParseAST(args.compile_source_path) }; + NodePtr root = ParseAST(args.compile_source_path); std::cout << "AST parsing complete" << std::endl; return root; } diff --git a/src/parser.y b/src/parser.y index f3f286e..c28d352 100644 --- a/src/parser.y +++ b/src/parser.y @@ -63,7 +63,7 @@ external_declaration function_definition : declaration_specifiers declarator compound_statement { - $$ = new FunctionDefinition($1, $2, $3); + $$ = new FunctionDefinition($1, NodePtr($2), NodePtr($3)); } ; @@ -87,7 +87,7 @@ direct_declarator delete $1; } | direct_declarator '(' ')' { - $$ = new DirectDeclarator($1); + $$ = new DirectDeclarator(NodePtr($1)); } ; @@ -100,8 +100,8 @@ compound_statement ; statement_list - : statement { $$ = new NodeList($1); } - | statement_list statement { $1->PushBack($2); $$=$1; } + : statement { $$ = new NodeList(NodePtr($1)); } + | statement_list statement { $1->PushBack(NodePtr($2)); $$=$1; } ; jump_statement @@ -109,7 +109,7 @@ jump_statement $$ = new ReturnStatement(nullptr); } | RETURN expression ';' { - $$ = new ReturnStatement($2); + $$ = new ReturnStatement(NodePtr($2)); } ; @@ -187,7 +187,7 @@ expression Node* g_root; -Node* ParseAST(std::string file_name) +NodePtr ParseAST(std::string file_name) { yyin = fopen(file_name.c_str(), "r"); if(yyin == NULL){ @@ -198,5 +198,5 @@ Node* ParseAST(std::string file_name) yyparse(); fclose(yyin); yylex_destroy(); - return g_root; + return NodePtr(g_root); } From 94b30f4be78c8f3052ae2e01e3d301d19a471d8e Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Thu, 7 Mar 2024 23:10:55 +0000 Subject: [PATCH 11/18] Updated NodePtr comment --- include/ast_node.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/ast_node.hpp b/include/ast_node.hpp index 4025b5f..373e3f1 100644 --- a/include/ast_node.hpp +++ b/include/ast_node.hpp @@ -19,6 +19,7 @@ class Node // If you don't feel comfortable using std::unique_ptr, you can switch NodePtr to be defined // as a raw pointer instead here and your project should still compile, although you'll need // to add destructors to avoid leaking memory +// (and get rid of the now unnecessary std::move-s) using NodePtr = std::unique_ptr; class NodeList : public Node From 29fa0895a83497498fa6c1118a9bfb20b22167cf Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Thu, 7 Mar 2024 23:19:58 +0000 Subject: [PATCH 12/18] Updated full parser example --- src/parser.y | 2 +- src/parser_full.y.example | 47 ++++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/parser.y b/src/parser.y index c28d352..d7e6fbd 100644 --- a/src/parser.y +++ b/src/parser.y @@ -83,7 +83,7 @@ declarator direct_declarator : IDENTIFIER { - $$ = new Identifier(*$1); + $$ = new Identifier(std::move(*$1)); delete $1; } | direct_declarator '(' ')' { diff --git a/src/parser_full.y.example b/src/parser_full.y.example index 1861858..f52f0b5 100644 --- a/src/parser_full.y.example +++ b/src/parser_full.y.example @@ -3,20 +3,24 @@ %code requires{ #include "ast.hpp" + using namespace ast; + extern Node* g_root; extern FILE* yyin; int yylex(void); - void yyerror(const char *); + void yyerror(const char*); + int yylex_destroy(void); } // Represents the value associated with any kind of AST node. %union{ - Node* node; - NodeList* nodes; - int number_int; - double number_float; - std::string* string; - yytokentype token; + Node* node; + NodeList* node_list; + int number_int; + double number_float; + std::string* string; + TypeSpecifier type_specifier; + yytokentype token; } %token IDENTIFIER INT_CONSTANT FLOAT_CONSTANT STRING_LITERAL @@ -36,13 +40,16 @@ %type identifier_list type_name abstract_declarator direct_abstract_declarator initializer initializer_list statement labeled_statement %type compound_statement declaration_list expression_statement selection_statement iteration_statement jump_statement -%type statement_list +%type statement_list %type unary_operator assignment_operator storage_class_specifier %type INT_CONSTANT STRING_LITERAL %type FLOAT_CONSTANT %type IDENTIFIER +%type type_specifier +// TODO: Make a better type for this (only needed for advanced features) +%type declaration_specifiers %start ROOT @@ -64,7 +71,7 @@ external_declaration function_definition : declaration_specifiers declarator declaration_list compound_statement | declaration_specifiers declarator compound_statement { - $$ = new FunctionDefinition($1, $2, $3); + $$ = new FunctionDefinition($1, NodePtr($2), NodePtr($3)); } | declarator declaration_list compound_statement | declarator compound_statement @@ -245,9 +252,7 @@ type_specifier : VOID | CHAR | SHORT - | INT { - $$ = new TypeSpecifier("int"); - } + | INT { $$ = TypeSpecifier::INT; } | LONG | FLOAT | DOUBLE @@ -312,7 +317,7 @@ declarator direct_declarator : IDENTIFIER { - $$ = new Identifier(*$1); + $$ = new Identifier(std::move(*$1)); delete $1; } | '(' declarator ')' @@ -321,7 +326,7 @@ direct_declarator | direct_declarator '(' parameter_list ')' | direct_declarator '(' identifier_list ')' | direct_declarator '(' ')' { - $$ = new DirectDeclarator($1); + $$ = new DirectDeclarator(NodePtr($1)); } ; @@ -419,8 +424,8 @@ declaration_list ; statement_list - : statement { $$ = new NodeList($1); } - | statement_list statement { $1->PushBack($2); $$=$1; } + : statement { $$ = new NodeList(NodePtr($1)); } + | statement_list statement { $1->PushBack(NodePtr($2)); $$=$1; } ; expression_statement @@ -449,7 +454,7 @@ jump_statement $$ = new ReturnStatement(nullptr); } | RETURN expression ';' { - $$ = new ReturnStatement($2); + $$ = new ReturnStatement(NodePtr($2)); } ; @@ -457,9 +462,9 @@ jump_statement %% -Node *g_root; +Node* g_root; -Node *ParseAST(std::string file_name) +NodePtr ParseAST(std::string file_name) { yyin = fopen(file_name.c_str(), "r"); if(yyin == NULL){ @@ -468,5 +473,7 @@ Node *ParseAST(std::string file_name) } g_root = nullptr; yyparse(); - return g_root; + fclose(yyin); + yylex_destroy(); + return NodePtr(g_root); } From 9a67610abec50162e757d965c6da1b6fa2f0e8a9 Mon Sep 17 00:00:00 2001 From: Fiwo735 Date: Sat, 16 Mar 2024 19:00:34 +0000 Subject: [PATCH 13/18] Renamed cli.h into cli.hpp, made cli and compiler more consistent --- include/{cli.h => cli.hpp} | 0 src/cli.cpp | 2 +- src/compiler.cpp | 57 +++++++++++++++++++++++--------------- 3 files changed, 35 insertions(+), 24 deletions(-) rename include/{cli.h => cli.hpp} (100%) diff --git a/include/cli.h b/include/cli.hpp similarity index 100% rename from include/cli.h rename to include/cli.hpp diff --git a/src/cli.cpp b/src/cli.cpp index 1e581e7..fc0610f 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -1,4 +1,4 @@ -#include +#include CommandLineArguments ParseCommandLineArgs(int argc, char **argv) { diff --git a/src/compiler.cpp b/src/compiler.cpp index 865673b..18d0ca5 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -1,69 +1,80 @@ #include #include -#include "cli.h" +#include "cli.hpp" #include "ast.hpp" using ast::NodePtr; -NodePtr Parse(const CommandLineArguments& args); +// Wrapper for ParseAST defined in YACC +NodePtr Parse(const std::string& compile_source_path); -// Output the pretty print version of what was parsed to the .printed output -// file. -void PrettyPrint(const NodePtr& root, const CommandLineArguments& args); +// Output the pretty print version of what was parsed to the .printed output file. +void PrettyPrint(const NodePtr& root, const std::string& compile_output_path); -// Compile from the root of the AST and output this to the -// args.compiledOutputPath file. -void Compile(const NodePtr& root, const CommandLineArguments& args); +// Compile from the root of the AST and output this to the compiledOutputPath file. +void Compile(const NodePtr& root, const std::string& compile_output_path); int main(int argc, char **argv) { // Parse CLI arguments to fetch the source file to compile and the path to output to. // This retrives [source-file.c] and [dest-file.s], when the compiler is invoked as follows: // ./bin/c_compiler -S [source-file.c] -o [dest-file.s] - auto command_line_arguments = ParseCommandLineArgs(argc, argv); + const auto [compile_source_path, compile_output_path] = ParseCommandLineArgs(argc, argv); - // Parse input and generate AST - auto ast_root = Parse(command_line_arguments); + // Parse input and generate AST. + auto ast_root = Parse(compile_source_path); + + // Check something was actually returned by parseAST(). if (ast_root == nullptr) { - // Check something was actually returned by parseAST(). - std::cerr << "The root of the AST was a null pointer. Likely the root was never initialised correctly during parsing." << std::endl; + std::cerr << "The root of the AST was a null pointer. "; + std::cerr << "Likely the root was never initialised correctly during parsing." << std::endl; return 3; } - PrettyPrint(ast_root, command_line_arguments); - Compile(ast_root, command_line_arguments); + // Print AST in a human-readable way. It's not assessed, but exists for your convenience. + PrettyPrint(ast_root, compile_output_path); + + // Compile to RISC-V assembly, the main goal of this project. + Compile(ast_root, compile_output_path); } -NodePtr Parse(const CommandLineArguments& args) +NodePtr Parse(const std::string& compile_source_path) { - std::cout << "Parsing: " << args.compile_source_path << std::endl; - NodePtr root = ParseAST(args.compile_source_path); + std::cout << "Parsing ..." << compile_source_path << std::endl; + + NodePtr root = ParseAST(compile_source_path); + std::cout << "AST parsing complete" << std::endl; + return root; } -void PrettyPrint(const NodePtr& root, const CommandLineArguments& args) +void PrettyPrint(const NodePtr& root, const std::string& compile_output_path) { - auto output_path = args.compile_output_path + ".printed"; + auto output_path = compile_output_path + ".printed"; std::cout << "Printing parsed AST..." << std::endl; + std::ofstream output(output_path, std::ios::trunc); root->Print(output); output.close(); + std::cout << "Printed parsed AST to: " << output_path << std::endl; } -void Compile(const NodePtr& root, const CommandLineArguments& args) +void Compile(const NodePtr& root, const std::string& compile_output_path) { // Create a Context. This can be used to pass around information about // what's currently being compiled (e.g. function scope and variable names). ast::Context ctx; std::cout << "Compiling parsed AST..." << std::endl; - std::ofstream output(args.compile_output_path, std::ios::trunc); + + std::ofstream output(compile_output_path, std::ios::trunc); root->EmitRISC(output, ctx); output.close(); - std::cout << "Compiled to: " << args.compile_output_path << std::endl; + + std::cout << "Compiled to: " << compile_output_path << std::endl; } From 0f4dda4ef009975a9ab9ad28e1f1fe4783013e7a Mon Sep 17 00:00:00 2001 From: Filip Wojcicki <50636446+Fiwo735@users.noreply.github.com> Date: Tue, 19 Mar 2024 18:23:27 +0100 Subject: [PATCH 14/18] Update src/compiler.cpp Co-authored-by: Jpnock --- src/compiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index 18d0ca5..dda5ce6 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -28,7 +28,7 @@ int main(int argc, char **argv) // Check something was actually returned by parseAST(). if (ast_root == nullptr) { - std::cerr << "The root of the AST was a null pointer. "; + std::cerr << "The root of the AST is a null pointer. "; std::cerr << "Likely the root was never initialised correctly during parsing." << std::endl; return 3; } From ee7d4694414d51cdb69f9b070a29839a4f12b1cb Mon Sep 17 00:00:00 2001 From: Filip Wojcicki <50636446+Fiwo735@users.noreply.github.com> Date: Sat, 23 Mar 2024 03:15:13 +0000 Subject: [PATCH 15/18] Removed unnecessary calls to .close() for ofstream --- src/compiler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index dda5ce6..6e24f24 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -59,7 +59,6 @@ void PrettyPrint(const NodePtr& root, const std::string& compile_output_path) std::ofstream output(output_path, std::ios::trunc); root->Print(output); - output.close(); std::cout << "Printed parsed AST to: " << output_path << std::endl; } @@ -74,7 +73,6 @@ void Compile(const NodePtr& root, const std::string& compile_output_path) std::ofstream output(compile_output_path, std::ios::trunc); root->EmitRISC(output, ctx); - output.close(); std::cout << "Compiled to: " << compile_output_path << std::endl; } From f18c0d95d3fc39724706e7442cb3bb28919f6b22 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Fri, 24 Jan 2025 18:15:32 +0000 Subject: [PATCH 16/18] Cleanup docs --- .devcontainer/devcontainer.json | 3 +- Dockerfile | 4 +- debugging/README.md | 216 ++++++------------- docs/assembler_directives.md | 2 +- docs/{ => assets}/assembler_directives.png | Bin docs/{ => assets}/coverage_example.png | Bin docs/{ => assets}/int_main_return_5_tree.png | Bin docs/{ => assets}/testcase_distribution.png | Bin docs/basic_compiler.md | 2 +- docs/c_compiler.md | 42 ++-- docs/coverage.md | 5 +- src/ast_type_specifier.cpp | 18 ++ 12 files changed, 111 insertions(+), 181 deletions(-) rename docs/{ => assets}/assembler_directives.png (100%) rename docs/{ => assets}/coverage_example.png (100%) rename docs/{ => assets}/int_main_return_5_tree.png (100%) rename docs/{ => assets}/testcase_distribution.png (100%) create mode 100644 src/ast_type_specifier.cpp diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index adfe8ef..7eade13 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -17,7 +17,8 @@ "ms-vscode.makefile-tools", "ms-python.python", "daohong-emilio.yash", - "EditorConfig.EditorConfig" + "EditorConfig.EditorConfig", + "llvm-vs-code-extensions.vscode-clangd" ] } } diff --git a/Dockerfile b/Dockerfile index 1e7a943..4566d4a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,9 @@ RUN apt-get update && apt-get install -y --fix-missing \ device-tree-compiler \ lcov \ nano \ - valgrind + valgrind \ + clang \ + bear # Install RISC-V Toolchain WORKDIR /tmp diff --git a/debugging/README.md b/debugging/README.md index 6c40b5a..513cf2b 100644 --- a/debugging/README.md +++ b/debugging/README.md @@ -24,7 +24,7 @@ For simplicity, I will be assuming that you are in a [Unix](https://en.wikipedia.org/wiki/Unix)-based environment. In the case that the tools here will not fit your problems / needs, it -_does_ help to simply lookup "How to fix XXXX" in a search +_does_ help to simply lookup "How to fix XXXX" (or just the error message you get) in a search engine. You might be surprised by the results (This is how I found out about valgrind, one of the tools presented below). @@ -33,17 +33,19 @@ valgrind, one of the tools presented below). If you have programmed in languages like Python or Java, you will be familiar with backtraces when your program crashes. -``` +### In Python ([example-backtrace-1.py](./example-backtrace-1.py)) +```console fyquah@olaf$ python3 example-backtrace-1.py -# in python (example-1.py) Traceback (most recent call last): File "example-1.py", line 9, in main() File "example-1.py", line 5, in main x += a[i] IndexError: list index out of range +``` -# in Java (Example2.java) +### In Java ([ExampleBacktrace2.java](./ExampleBacktrace2.java)) +```console fyquah@olaf$ java Example2 Exception in thread "main" java.lang.StackOverflowError at Example2.fibonaci(Example2.java:5) @@ -52,52 +54,36 @@ Exception in thread "main" java.lang.StackOverflowError .... at Example2.fibonaci(Example2.java:5) at Example2.fibonaci(Example2.java:5) +``` -# In C++, however ... :( <-- this is a sad emoji -fyquah@olaf: debugging $ ./example-backtrace-3 +### In C++, however ... ([example-backtrace-3.cpp](./example-backtrace-3.cpp)) +```console +fyquah@olaf$ ./example-backtrace-3 Segmentation fault ``` Having a backtraces is incredibly helpful, as it helps you pinpoint where in the code did the program crash. To get something similar in C/C++: -- Make sure you compile your object files with the `-g` flag. Read the g++'s - manual pages (`man g++`) to see what this flag does. There are many ways - to encode source information within binaries, one of the most popular - ways is to use [DWARF](http://dwarfstd.org) (You are not required to - understand how DWARF works, but nevertheless, - [this introduction on DWARF](http://dwarfstd.org/doc/Debugging%20using%20DWARF-2012.pdf) - is quite an entertaining read) -- Compilers allow you to compile your code with different levels of - optimisation, e.g. `-O0` for fast compilation or `-O3` for agressive - optimisation. When debugging, DO NOT compile with `-O3`. While debuggers may still work, - they will most likely not yield much useful information. +- Make sure you compile your object files with the `-g` flag. This essentially compiles your code with debug symbols, which will make the information available to a debugger like `gdb`. + - If you'd like, read `g++`'s manual pages (`man g++`) to learn more about what this flag does. There are many ways to encode source information within binaries, one of the most popular ways is to use [DWARF](http://dwarfstd.org) (You are not required to understand how DWARF works, but nevertheless, [this introduction on DWARF](http://dwarfstd.org/doc/Debugging%20using%20DWARF-2012.pdf) is quite an entertaining read) +- Compilers allow you to compile your code with different levels of optimisation, e.g. `-O0` for fast compilation or `-O3` for aggressive optimisation. When debugging, compile with `-O0`. While debuggers may still work with optimisation enabled (i.e. `-O1` or higher), they will most likely not yield much useful information. - Execute your binary with a [debugger](https://en.wikipedia.org/wiki/Debugger) Your choice of debuggers are as follows: -- On Linux, the GNU debugger (commonly referred to as `gdb`) should work out - of the box. The EE lab machines are setup with `gdb`. However, should you need - to install it yourself, most Linux distributions will have `gdb` as part of their - package managers and can easily be installed using `apt-get`. -- On MacOS, you can either use `lldb`, using this - [command translation table](https://lldb.llvm.org/lldb-gdb.html) - OR you can also try installing `gdb` and - [code signing it](https://gist.github.com/gravitylow/fb595186ce6068537a6e9da6d8b5b96d). - It is recommended to stick with `lldb` as code signing `gdb` - can be a troublesome process. +- On Linux, the GNU debugger (commonly referred to as `gdb`) should work out of the box and is pre-installed in the Docker container. However, should you need to install it yourself, most Linux distributions will have `gdb` as part of their package managers and can easily be installed using `apt-get`. + - Most IDEs will provide debugger support via gdb integration. If you're using VSCode, we've already provided a debugging configuration in [launch.json](../.vscode/launch.json), which will run your compiler on [example.c](../compiler_tests/_example/example.c). You can change the testcase it runs on by specifying it in the `args` list. See [the VSCode docs](https://code.visualstudio.com/docs/editor/debugging) on how you can use it, it essentially gives you the same functionality as `gdb` on the command line, but with a much nicer user interface. +- On MacOS, you can either use `lldb`, using this [command translation table](https://lldb.llvm.org/lldb-gdb.html) OR you can also try installing `gdb` and [code signing it](https://gist.github.com/gravitylow/fb595186ce6068537a6e9da6d8b5b96d). It is recommended to stick with `lldb` as code signing `gdb` can be a troublesome process. -*The following text will assume you are using gdb.* +*The following text will assume you are using `gdb` on the command line.* -There is a little program called [example-backtrace-3.cpp](./example-backtrace-3.cpp) which -takes a list of numbers as its input arguments sorts them, and outputs the -last element of the list. There is a bug in the program that will result in -a segmentaion fault (Exercise: Spot the bug). +There is a little program called [example-backtrace-3.cpp](./example-backtrace-3.cpp) which takes a list of numbers as its input arguments sorts them, and outputs the last element of the list. There is a bug in the program that will result in a segmentation fault (Exercise: Spot the bug). To view a backtrace when running a program, -```bash -fyquah@olaf: debugging $ gdb --args ./example-3 +```console +fyquah@olaf$ gdb --args ./example-3 GNU gdb (GDB) 8.0 Copyright (C) 2017 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later @@ -128,11 +114,9 @@ Program received signal SIGSEGV, Segmentation fault. This follows the following steps: -- Start `gdb` whilst specifying the target binary (and optionally - command line arguments) +- Start `gdb` whilst specifying the target binary (and optionally command line arguments) - Call `run` to execute the program -- Your program crashes (or suceededs, in which it you don't need to do - anything) +- Your program crashes (or suceededs, in which it you don't need to do anything) - Call `bt` - Marvel in the abilities of debuggers -- "You can do that in C++??" - (Optional) Print some variables to figure out the source of the problem @@ -147,7 +131,7 @@ There is a lot more you can do with gdb, like printing variables, calling functions, inserting breakpoints or even manually step through your code. A little demo of some of its features: -```bash +```console (gdb) call (int) printf("%d\n", argc) 1 $11 = 2 @@ -164,51 +148,28 @@ $14 = { >> = {_M_impl = {left` and `n5->right` are not -assigned appropriately and (2) `n5` is not allocated enough memory (as `sizeof(int)` < -`sizeof(struct tree_t)`. Surprisingly enough, the code doesn't always crash. - -```bash -fyquah@olaf: debugging $ make example-leak +[Valgrind](http://valgrind.org) is a dynamic analysis tool to help with various issues, such as threading bugs, memory management issues, cache-profiling and heap-profiling. We are primarily interested in memory checking (Memcheck) using dynamic analysis that helps us detect memory errors. Valgrind is available in most linux distributions. + +Valgrind should come pre-installed in your Docker container, and you should be able to install `valgrind` from package managers, eg: `sudo apt-get install valgrind` on most linux distributions. You can also [download valgrind](http://valgrind.org/downloads/current.html) and compile it from source. + +Consider the example in [example-leak.c](./example-leak.c). There are two two problems with the code: (1) `n5->left` and `n5->right` are not assigned appropriately and (2) `n5` is not allocated enough memory (as `sizeof(int)` < `sizeof(struct tree_t)`. Surprisingly enough, the code doesn't always crash. + +```console +fyquah@olaf$ make example-leak cc -Wall -g example-leak.c -o example-leak -fyquah@olaf: debugging $ ./example-leak +fyquah@olaf$ ./example-leak 6 5 8 @@ -217,21 +178,12 @@ fyquah@olaf: debugging $ ./example-leak 7 ``` -There are various explainations you can give as to why this program doesn't -crash, but this is not a behaviour you should not rely upon. -[This stackoverflow post](https://stackoverflow.com/questions/8029584/why-does-malloc-initialize-the-values-to-0-in-gcc/8029624#8029624) -gives insight as to why the values are zero-ed out and why there wasn't a -segmentation fault, which explains why the program the pointer didn't end up -derefencing a jiberrish pointer. As to why the memory access didn't result -in a segmentation fault due to accessing unallocated memory, try to recall -how memory and page tables are organised in an operating system). - -To rectify this memory problem, we can use valgrind -to diagnose the source of the problem: +There are various explainations you can give as to why this program doesn't crash, but this is not a behaviour you should not rely upon. [This stackoverflow post](https://stackoverflow.com/questions/8029584/why-does-malloc-initialize-the-values-to-0-in-gcc/8029624#8029624) gives insight as to why the values are zero-ed out and why there wasn't a segmentation fault, which explains why the program the pointer didn't end up dereferencing a gibberish pointer. As to why the memory access didn't result in a segmentation fault due to accessing unallocated memory, try to recall how memory and page tables are organised in an operating system). +To rectify this memory problem, we can use valgrind to diagnose the source of the problem: -```bash -fyquah@olaf: debugging $ valgrind ./example-leak +```console +fyquah@olaf$ valgrind ./example-leak ==7177== Memcheck, a memory error detector ==7177== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. ==7177== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info @@ -282,19 +234,15 @@ fyquah@olaf: debugging $ valgrind ./example-leak There are two main things happening: -1. There are two error messages about the addresses with an incorrect amount of - memory allocated. This is an extremely good demonstration of the power of - valgrind - (a) It tells you when. (Thought exercise: Where did the numbers, - "Address 0x5203228 is 4 bytes" and "Address 0x5203230 is 12 bytes" - come from?) +1. There are two error messages about the addresses with an incorrect amount of memory allocated. This is an extremely good demonstration of the power of valgrind - (a) It tells you when. (Thought exercise: Where did the numbers, "Address 0x5203228 is 4 bytes" and "Address 0x5203230 is 12 bytes" come from?) 2. Memory leak (as expected), as we are not de-allocating memory ourselves. Let's fix the `malloc` call and set the argument to the right size, then see what happens: -``` -fyquah@olaf: debugging $ make example-leak -fyquah@olaf: debugging $ valgrind ./example-leak +```console +fyquah@olaf$ make example-leak +fyquah@olaf$ valgrind ./example-leak ==7892== Memcheck, a memory error detector ==7892== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. ==7892== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info @@ -346,9 +294,9 @@ valgrind alone and it is more likely that gdb will be more useful Anyways, if we fix the assignments (by assigning `n5->left` and `n5->right` to NULL), we are left with: -```bash -fyquah@olaf: debugging $ make example-leak -fyquah@olaf: debugging $ valgrind ./example-leak +```console +fyquah@olaf$ make example-leak +fyquah@olaf$ valgrind ./example-leak ==8192== Memcheck, a memory error detector ==8192== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. ==8192== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info @@ -377,72 +325,44 @@ fyquah@olaf: debugging $ valgrind ./example-leak ==8192== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) ``` -The only problem left will be memory leaks. It is not reported as an error -because it is often the case that heap memory freed simply by program -termination is sufficient. +The only problem left will be memory leaks. It is not reported as an error because this heap memory is still freed by the operating system once the program terminates, but it is bad practice, and can easily be avoided by following the [RAII paradigm](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization). -Unless you are writing your compiler in C (or C++ that looks like C), the -main avenue in which you will bump into the memory block size issue is in the -following type of code: +Unless you are writing your compiler in C (or C++ that looks like C), the main avenue in which you will bump into the memory block size issue is in the following type of code: ```C++ class Statement : public Node { ... } -static void -some_function() +static void some_function() { Node *node = node; - Statement *stmt = (Staement*) node; // This is an invariant: I know what I am doing + Statement *stmt = (Statement*) node; // This is an invariant: I know what I am doing } ``` -In most cases, you actually don't know what you are doing. For that reason, -you really should avoid such casts when you can. Unfortunately, compiler -authors, inevitably, write compilers with a lot of hand-maintained -invariants. When you see yourself writing a type-cast as such in your code, -it is worth asking yourself whether it is possible to modify your code slightly -to avoid this cast altogether. If that's not trivially possible and you have -to perform type-casting, use `static_cast<>` or `dynamic_cast<>` -(See [this stackoverflow post](https://stackoverflow.com/a/1255015) -for an explaination on their differences). +In most cases, you actually don't know what you are doing. For that reason, you really should avoid such casts when you can. Unfortunately, compiler authors, inevitably, write compilers with a lot of hand-maintained invariants. When you see yourself writing a type-cast as such in your code, it is worth asking yourself whether it is possible to modify your code slightly to avoid this cast altogether. If that's not trivially possible and you have to perform type-casting, use `static_cast<>` or `dynamic_cast<>` (See [this stackoverflow post](https://stackoverflow.com/a/1255015) for an explanation on their differences). ## Static Analysis -Valgrind and GDB falls under the category of dynamic analysis, where -the program is run and information gained at runtime is used to search for errors. -Another common form of program analysis is [static analysis](https://en.wikipedia.org/wiki/Static_program_analysis), -where the analysers checks for problems in your code without -executing it. There are a lot of tools available (for free!) to perform -such types of analysis. Some are built into modern IDEs such as XCode, while -others require some form of setup. We will limit our discussion to one -specific tool, the [clang static analyser](https://clang-analyzer.llvm.org/). - -The clang static analyser generates a really nice web UI for you to navigate -through errors. Static analyser tools in general have very low -false positive rates. That is, they may not necessarily report all errors, -but all errors they report are very likely to be genuine errors. - -Similar to valgrind, the clang static analyser is not setup in the EEE lab -machines. To install the clang static analyser, you should install -[clang](https://clang.llvm.org/) using your package manager. In Ubuntu, -it ought to be as simple as `sudo apt-get install clang`. (Compiling +Valgrind and GDB falls under the category of dynamic analysis, where the program is run and information gained at runtime is used to search for errors. Another common form of program analysis is [static analysis](https://en.wikipedia.org/wiki/Static_program_analysis), where the analysers checks for problems in your code without executing it. There are a lot of tools available (for free!) to perform such types of analysis. Some are built into modern IDEs such as XCode, while +others require some form of setup. We will limit our discussion to one specific tool, the [clang static analyser](https://clang-analyzer.llvm.org/). + +The clang static analyser generates a really nice web UI for you to navigate through errors. Static analyser tools in general have very low false positive rates. That is, they may not necessarily report all errors, but all errors they report are very likely to be genuine errors. + +Similar to valgrind, the clang static analyser should also be included in your Docker container. To install the clang static analyser yourself, you should install [clang](https://clang.llvm.org/) using your package manager. In Ubuntu, it ought to be as simple as `sudo apt-get install clang`. (Compiling clang from source is a bit tricky, and not recommended). Setting up the clang static analyser is surprisingly simple: 0. Install the `clang` and `llvm` toolchains on your local setup. 1. Run `scan-build make your_make_target` -2. Wait for compilation. You will notice that compilation is noticeably - slower due to the static analyser running simultaneously to the compiler. -3. You will get a message telling you how many errors were found and how to - view them. -4. You can either call `scan-view` as instructed, or simply navigate to - `/tmp/scan-build-XXXXXXX/index.html` on your web browser. +2. Wait for compilation. You will notice that compilation is noticeably slower due to the static analyser running simultaneously to the compiler. +3. You will get a message telling you how many errors were found and how to view them. +4. You can either call `scan-view` as instructed, or simply navigate to `/tmp/scan-build-XXXXXXX/index.html` on your web browser. -``` -fyquah@olaf: project $ scan-build make +```console +fyquah@olaf$ scan-build make scan-build: Using '/usr/bin/clang-4.0' for static analysis flex -o src/c_lexer.yy.c src/c_lexer.lex .... (snip) ... diff --git a/docs/assembler_directives.md b/docs/assembler_directives.md index 8c90162..32f1c09 100644 --- a/docs/assembler_directives.md +++ b/docs/assembler_directives.md @@ -12,4 +12,4 @@ In the [`scripts/test.py`](../scripts/test.py) script, when running testcases, t The below picture offers a quick walk-through of a very simple program with detailed annotations describing the meaning behind the included directives. Some of them a crucial (e.g. section specifiers, labels, data emitting) while others not so much (e.g. file attributes, compiler identifier, symbol types) - you will get a feel for them during the development of the compiler. Most importantly, you only need to set the correct section and provide function directives as long as you deal with local variables. **In other words, you can postpone studying this document in details until you decide to deal with global variables.** -![Assembler directives](./assembler_directives.png) +![Assembler directives](./assets/assembler_directives.png) diff --git a/docs/assembler_directives.png b/docs/assets/assembler_directives.png similarity index 100% rename from docs/assembler_directives.png rename to docs/assets/assembler_directives.png diff --git a/docs/coverage_example.png b/docs/assets/coverage_example.png similarity index 100% rename from docs/coverage_example.png rename to docs/assets/coverage_example.png diff --git a/docs/int_main_return_5_tree.png b/docs/assets/int_main_return_5_tree.png similarity index 100% rename from docs/int_main_return_5_tree.png rename to docs/assets/int_main_return_5_tree.png diff --git a/docs/testcase_distribution.png b/docs/assets/testcase_distribution.png similarity index 100% rename from docs/testcase_distribution.png rename to docs/assets/testcase_distribution.png diff --git a/docs/basic_compiler.md b/docs/basic_compiler.md index bd68886..24a9da0 100644 --- a/docs/basic_compiler.md +++ b/docs/basic_compiler.md @@ -10,7 +10,7 @@ int f() { The compiler is able to traverse the following AST related to the above program. In order to expand its capabilities, you should develop the parser and the corresponding code generation at the same time -- you are advised not to fully implement one before the other. -![int_main_return_tree](./int_main_return_5_tree.png) +![int_main_return_tree](./assets/int_main_return_5_tree.png) The lexer and parser are loosely based on the "official" grammar covered [here](https://www.lysator.liu.se/c/ANSI-C-grammar-l.html) and [here](https://www.lysator.liu.se/c/ANSI-C-grammar-y.html) respectively. While they should suffice for a significant portions of features, you might need to improve them to implement the more advanced ones. If you find the grammar too complicated to understand, it is also perfectly fine to create your own simple grammar and build upon it as you add more features. diff --git a/docs/c_compiler.md b/docs/c_compiler.md index fc36109..83b9073 100644 --- a/docs/c_compiler.md +++ b/docs/c_compiler.md @@ -1,14 +1,11 @@ -Main coursework: A compiler for the C language -============================================== +# Main coursework: A compiler for the C language Your program should read C source code from a given file, and write corresponding RISC-V assembly to another given file. -Environment ------------ +## Environment [How to set up your environment?](./environment_guide.md) -Developing your compiler ------------------------- +## Developing your compiler If you wish to use C++, then a basic framework for building your compiler has been provided. You are strongly recommended to check out its structure [here](./basic_compiler.md). @@ -56,8 +53,7 @@ By default, the first [`_example/example.c`](../compiler_tests/_example/example. This basic framework is only able to compile a very simple program, as described [here](./basic_compiler.md). -Program build and execution ---------------------------- +## Program build and execution Your program should be built by running the following command in the top-level directory of your repo: @@ -73,8 +69,7 @@ The compilation function is invoked using the flag `-S`, with the source file an You can assume that the command-line (CLI) arguments will always be in this order, and that there will be no spaces in source or destination paths. Note that the provided starting point in this repository already functions as specified above, so these CLI arguments should work out of the box (unless you decide not to use the provided base compiler). -Input ------ +## Input The input file will be pre-processed [ANSI C](https://en.wikipedia.org/wiki/ANSI_C), also called C90 or C89. It is what is generally thought of as "classic" or "normal" C, but not the _really_ old one without function prototypes (you may never have come across that). C90 is still often used in embedded systems, and pretty much the entire Linux kernel is in C90. @@ -84,8 +79,7 @@ The source code will not contain any compiler-specific or platform-specific exte The test inputs will be a set of files of increasing complexity and variety. The test inputs will not have syntax errors or other programming errors, so your code does not need to handle these gracefully. -Features -------- +## Features Here is a list of basic features that you might like to implement first. @@ -117,7 +111,7 @@ Here is a list of more advanced features like you might like to implement once t * calling externally-defined functions (i.e. the file being compiled declares a function, but its definition is provided in a different file that is linked in later on) * functions that take more than 8 parameters * mutually recursive function calls -* locally scoped variable declarations (e.g. a variable that is declared inside the body of a while loop, such as `while(...) { int x = ...; ... }`. +* locally scoped variable declarations (e.g. a variable that is declared inside the body of a while loop, such as `while(...) { int x = ...; ... }`). * the `typedef` keyword * the `sizeof(...)` function (which takes either a type or a variable) * taking the address of a variable using the `&` operator @@ -145,8 +139,7 @@ Here is a (partial) list of features that will not be tested. * the `void` type is not tested explicitly, but it appears in some helper functions in the test cases, so your compiler cannot break when it encounters this keyword * the `static` keyword -Test cases ----------- +## Test cases All test inputs will be valid; that is, you can assume the absence of programmer errors like syntax faults, type mismatches, and array out-of-bounds errors. The entire compilation and testing process (including compilation, assembly, linking, and RISC-V simulation) is expected to complete within ten seconds per program (which should be plenty of time!), and is expected not to use an inordinate amount of memory or disk space. There is no requirement for the generated assembly to be optimised in any way -- the only requirement is that it produces the correct answer. @@ -154,10 +147,9 @@ The [compiler_tests](../compiler_tests) contains a large number of example input The split between test cases last year can be seen below. Do not assume it will stay the same this year, but you can use it as a rough estimate of what to focus on in case you are running short on time. **Remember that tests for advanced features will also test basic features, so you should implement the basic features first (e.g. without working functions the array tests will fail).** -![Testcase distribution](./testcase_distribution.png) +![Testcase distribution](./assets/testcase_distribution.png) -Output Format -------------- +## Output Format The output format should be RISC-V assembly code. @@ -206,12 +198,10 @@ I then use spike to simulate the executable on RISC-V, like so: This command should produce the exit code `0`. -Assembler directives ---------------- +## Assembler directives [You will need to consider assembler directives in your output](./assembler_directives.md) -Useful links ------------- +## Useful links * [Godbolt](https://godbolt.org/z/vMMnWbsff) - Great tool for viewing what a real (`gcc` in this case) RISC-V compiler would produce for a given snippet of C code. This link is pre-configured for the correct architecture (`RV32IMFD`) and ABI (`ILP32D`) that the coursework targets. Code optimisation is also disabled to best mimic what you might want your compiler to output. You can replicate Godbolt locally by running `riscv64-unknown-elf-gcc -std=c90 -pedantic -ansi -O0 -march=rv32imfd -mabi=ilp32d -S [source-file.c] -o [dest-file.s]`, which might make debugging and directives analysis easier for some. * [Interactive RISC-V simulator](https://creatorsim.github.io/creator) - Might be helpful when trying to work out the behaviour of certain instructions that Godbolt emits. @@ -222,10 +212,10 @@ Useful links * [RISC-V Assembler Reference](https://michaeljclark.github.io/asm.html) - Very useful resource containing information about structuring your output assembly files and most importantly the assembler directives - if you don't know the meaning behind `.data`, `.text`, or `.word` then definitely check this out as well as experiment with Godbolt to see how it actually emits them. -Getting started ---------------- +## Getting started [How to get started? (previous students' perspectives)](./starting_guide.md) -Coverage information ------------ +## Coverage information [Do you want to know which part of your code is executed when running your compiler on a file?](./coverage.md) + + diff --git a/docs/coverage.md b/docs/coverage.md index af4377d..30c7819 100644 --- a/docs/coverage.md +++ b/docs/coverage.md @@ -1,11 +1,10 @@ -Coverage information -==================== +# Coverage information If you want to know which part of your code is executed when running your compiler on a file you can run your compiler on the file, then run `make coverage`. This will generate a webpage `coverage/index.html` with a listing of all the source files and for each source file a listing of the number of times each line has been executed. -![Index.html screenshot](./coverage_example.png) +![Index.html screenshot](./assets/coverage_example.png) It can also be used automatically on all test files by running: `./scripts/test.py --coverage` or using the old test script: `COVERAGE=1 ./test.sh`. diff --git a/src/ast_type_specifier.cpp b/src/ast_type_specifier.cpp new file mode 100644 index 0000000..03c6cfa --- /dev/null +++ b/src/ast_type_specifier.cpp @@ -0,0 +1,18 @@ +#include "ast_type_specifier.hpp" + +#include + +namespace ast +{ + +constexpr std::string_view ToString(TypeSpecifier type) +{ + switch (type) + { + case TypeSpecifier::INT: + return "int"; + } + throw std::runtime_error("Unexpected type specifier"); +} + +} From 4feb95312042e4327766295f0a42c5c59df7c5e8 Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Fri, 24 Jan 2025 18:59:04 +0000 Subject: [PATCH 17/18] Fixed compile error --- include/ast_type_specifier.hpp | 18 +++++++++++------- src/ast_function_definition.cpp | 2 +- src/ast_type_specifier.cpp | 18 ------------------ 3 files changed, 12 insertions(+), 26 deletions(-) delete mode 100644 src/ast_type_specifier.cpp diff --git a/include/ast_type_specifier.hpp b/include/ast_type_specifier.hpp index 35d9c5d..8398382 100644 --- a/include/ast_type_specifier.hpp +++ b/include/ast_type_specifier.hpp @@ -10,14 +10,18 @@ enum class TypeSpecifier INT }; -constexpr std::string_view ToString(TypeSpecifier type) +template +LogStream& operator<<(LogStream& ls, const TypeSpecifier& type) { - switch (type) - { - case TypeSpecifier::INT: - return "int"; - } - throw std::runtime_error("Unexpected type specifier"); + const auto TypeToString = [&type] { + switch (type) + { + case TypeSpecifier::INT: + return "int"; + } + throw std::runtime_error("Unexpected type specifier"); + }; + return ls << TypeToString(); } } diff --git a/src/ast_function_definition.cpp b/src/ast_function_definition.cpp index 17cc200..5aa1859 100644 --- a/src/ast_function_definition.cpp +++ b/src/ast_function_definition.cpp @@ -20,7 +20,7 @@ void FunctionDefinition::EmitRISC(std::ostream& stream, Context& context) const void FunctionDefinition::Print(std::ostream& stream) const { - stream << ToString(declaration_specifiers_) << " "; + stream << declaration_specifiers_ << " "; declarator_->Print(stream); stream << "() {" << std::endl; diff --git a/src/ast_type_specifier.cpp b/src/ast_type_specifier.cpp deleted file mode 100644 index 03c6cfa..0000000 --- a/src/ast_type_specifier.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "ast_type_specifier.hpp" - -#include - -namespace ast -{ - -constexpr std::string_view ToString(TypeSpecifier type) -{ - switch (type) - { - case TypeSpecifier::INT: - return "int"; - } - throw std::runtime_error("Unexpected type specifier"); -} - -} From 12198c44636d7ac661b5566346de92d510cc34fc Mon Sep 17 00:00:00 2001 From: Simon Staal Date: Fri, 24 Jan 2025 19:07:42 +0000 Subject: [PATCH 18/18] Add extra compiler flags --- Makefile | 7 +++---- src/ast_constant.cpp | 2 +- src/ast_identifier.cpp | 2 +- src/lexer.flex | 3 +++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c38e359..b929eaf 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,9 @@ CXXFLAGS := -std=c++20 # use the 2020 version of the C++ standard CXXFLAGS += -g # generate debugging information -CXXFLAGS += -Wall # enable most warnings, except those about ... -CXXFLAGS += -Wno-unused-parameter # ... unused function parameters, ... -CXXFLAGS += -Wno-unused-variable # ... unused variables, ... -CXXFLAGS += -Wno-unused-function # ... or unused functions. +CXXFLAGS += -Wall # enable most warnings +CXXFLAGS += -Wextra # enable extra warnings +CXXFLAGS += -Werror # treat all warnings as errors CXXFLAGS += -fsanitize=address # enable address sanitization CXXFLAGS += -static-libasan # statically link with Address Sanitizer CXXFLAGS += -O0 # perform minimal optimisations diff --git a/src/ast_constant.cpp b/src/ast_constant.cpp index 21759c0..1b61478 100644 --- a/src/ast_constant.cpp +++ b/src/ast_constant.cpp @@ -2,7 +2,7 @@ namespace ast { -void IntConstant::EmitRISC(std::ostream& stream, Context& context) const +void IntConstant::EmitRISC(std::ostream& stream, Context&) const { stream << "li a0, " << value_ << std::endl; } diff --git a/src/ast_identifier.cpp b/src/ast_identifier.cpp index b598fe1..28664b5 100644 --- a/src/ast_identifier.cpp +++ b/src/ast_identifier.cpp @@ -2,7 +2,7 @@ namespace ast { -void Identifier::EmitRISC(std::ostream& stream, Context& context) const +void Identifier::EmitRISC(std::ostream& stream, Context&) const { stream << identifier_; } diff --git a/src/lexer.flex b/src/lexer.flex index 120fe38..f6c20ff 100644 --- a/src/lexer.flex +++ b/src/lexer.flex @@ -7,6 +7,9 @@ extern "C" int fileno(FILE *stream); #include "parser.tab.hpp" + + // Suppress warning about unused function + [[maybe_unused]] static void yyunput (int c, char * yy_bp ); %} D [0-9]