Add symbol support for labels
x1phosura x1phosura@x1phosura.zone
Mon, 02 Jan 2023 23:59:26 -0800
2 files changed,
328 insertions(+),
28 deletions(-)
M
projects/06/Makefile
→
projects/06/Makefile
@@ -8,9 +8,6 @@
bin/assembler1: assembler1/assembler1.c $(CC) $(CFLAGS) -o $@ $< -#bin/bin2text: bin2text.c -# $(CC) $(CFLAGS) -o $@ $< - clean: rm -vf bin/*
M
projects/06/assembler1/assembler1.c
→
projects/06/assembler1/assembler1.c
@@ -4,18 +4,45 @@ #include <stdio.h>
#include <stdlib.h> #include <string.h> -//#define DBGLOG(...) printf(__VA_ARGS__) -#define DBGLOG(...) +#define DBGLOG(...) printf(__VA_ARGS__) +//#define DBGLOG(...) #define error(...) fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "%lu | %s\n", \ g_asm_line_number, g_asm_line); #define MAX_LINE_LEN 256 +#define MAX_SYMBOL_STR_LEN MAX_LINE_LEN - 2 +#define RESERVED_LABEL_NUM 23 +#define MAX_SYMBOLS 32768 + char *g_asm_line; // currently-read line for convenience size_t g_asm_line_number; // current line number size_t g_instruction_number = 0; // instruction offset +struct symbol_t { + char *symbolstr; + uint16_t value; +}; + +char *g_reserved_symbol_strs[RESERVED_LABEL_NUM] = {"R0", "R1", "R2", "R3", + "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", + "R12", "R13", "R14", "R15", + "SP", "LCL", "ARG", "THIS", + "THAT", "SCREEN", "KBD"}; +uint16_t g_reserved_symbol_values[RESERVED_LABEL_NUM] = {0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + 0, 1, 2, 3, + 4, 0x4000, 0x6000}; + +// label and variable symbols will effectively be treated the same; the only +// difference is in their use (in a hack program) +struct symbol_t *g_symbol_list[MAX_SYMBOLS + RESERVED_LABEL_NUM]; +size_t g_symbol_list_len = 0; + void print_binary_word16(uint16_t w) {@@ -47,44 +74,269 @@
return ret; } +void debug_dump_all_symbols() +{ + size_t i; + struct symbol_t *symbol; + + DBGLOG("-------- DEBUG SYMBOL DUMP --------\n"); + for (i = 0; i < g_symbol_list_len; ++i) { + symbol = g_symbol_list[i]; + DBGLOG("symbol (%s, %hu)\n", symbol->symbolstr, symbol->value); + } + DBGLOG("-------- END SYMBOL DUMP --------\n"); + + g_symbol_list_len = 0; +} + +void free_all_symbols() +{ + size_t i; + struct symbol_t *symbol; + + for (i = 0; i < g_symbol_list_len; ++i) { + symbol = g_symbol_list[i]; + free(symbol->symbolstr); // not checking NULL + free(symbol); + } + + g_symbol_list_len = 0; +} + +struct symbol_t *create_symbol(const char *str, uint16_t value) +{ + size_t len, i; + struct symbol_t *symbol; + char *symbolstr; + + //DBGLOG("creating symbol {'%s', %hu} ...\n", str, value); + + len = strlen(str); + if (len == 0) { + error("error creating empty label\n"); + return NULL; + } + + symbol = malloc(sizeof(struct symbol_t)); + symbolstr = malloc(len + 1); + if (symbol == NULL || symbolstr == NULL) { + error("error creating label: malloc() returned NULL\n"); + exit(-1); + } + + for (i = 0; i < len; ++i) { + symbolstr[i] = str[i]; + } + symbolstr[len] = '\0'; + + symbol->symbolstr = symbolstr; + symbol->value = value; + + return symbol; +} + +struct symbol_t *lookup_symbol(char *str) +{ + size_t i; + int diff; + struct symbol_t *symbol; + + for (i = 0; i < g_symbol_list_len; ++i) { + symbol = g_symbol_list[i]; + diff = strncmp(str, symbol->symbolstr, MAX_SYMBOL_STR_LEN); + if (!diff) { + return symbol; + } + } + + return NULL; // symbol not found +} + +bool add_symbol(struct symbol_t *symbol) +{ + struct symbol_t *symbol_already_present; + + if (g_symbol_list_len > (MAX_SYMBOLS + RESERVED_LABEL_NUM)) { + error("internal: symbol list full, over %d symbols\n", + (MAX_SYMBOLS + RESERVED_LABEL_NUM)); + return false; + } + + symbol_already_present = lookup_symbol(symbol->symbolstr); + if (symbol_already_present != NULL) { // if present + error("error: failed to add symbol %s: already found in list\n", + symbol->symbolstr); + return false; + } + + g_symbol_list[g_symbol_list_len] = symbol; + ++g_symbol_list_len; + return true; +} + +// pre-fill list with 'reserved' symbols and values +bool init_symbol_list() +{ + size_t i; + struct symbol_t *s; + + for (i = 0; i < RESERVED_LABEL_NUM; ++i) { + //DBGLOG("init_symbol_list: adding symbol {%s, %hu}\n", + // g_reserved_symbol_strs[i], g_reserved_symbol_values[i]); + if((s = create_symbol(g_reserved_symbol_strs[i], + g_reserved_symbol_values[i])) == NULL) { + //DBGLOG("init_symbol_list: failed to create symbol\n"); + return false; // failed to create symbol + } + if (!add_symbol(s)) { + //DBGLOG("init_symbol_list: failed to add symbol\n"); + return false; // failed to add symbol + } + } + + return true; +} + +// assumes line[0] == '(' +bool parse_label(const char *line) +{ + size_t i; + uint16_t value; + char c, labelstr[MAX_SYMBOL_STR_LEN + 1]; + struct symbol_t *symbol; + + //DBGLOG("Parsing label: line %lu | %s\n", g_asm_line_number, line); + + if (line[0] != '(') { // just to be safe + return false; // not a label; label's look like (THIS) + } + + for (i = 1; (c = line[i]) != '\0'; ++i) { + if (i == 1 && ('0' <= c && c <= '9')) { + error("syntax error: label starts with a number\n"); + return false; + } + // parse characters until closing ')' + if (i > MAX_SYMBOL_STR_LEN) { + error("syntax error: label is too long (> %d chars)\n", + MAX_SYMBOL_STR_LEN); + return false; + } else if (c == ')') { + if (i == 1) { + error("syntax error: empty label '()'\n"); + return false; + } + + labelstr[i-1] = '\0'; + + // create label, add to symbol list + value = (uint16_t)g_instruction_number; // safe. won't + // be > 65535 + symbol = create_symbol(labelstr, value); + if (symbol == NULL) + return false; + return add_symbol(symbol); + break; + } else if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') + || ('a' <= c && c <= 'z') || c == '_') { + // TODO TODO: enforce label name NOT start with a number! + labelstr[i-1] = c; + } else { + error("syntax error: disallowed character '%hhu' found" + " in label\n", c); + return false; + } + } + + if (c == '\0') { // should_ be ')' after break if all goes as expected + error("syntax error: no matching ')' found for label"); + } + + return false; +} + +bool parse_line_for_label(const char *line) +{ + char c; + size_t i; + + for (i = 0; (c = line[i]) != '\0'; ++i) { + if (c == ' ' || c == '\t') { + continue; + } else if (i == 0 && c == '\0') { + return false; + } else if (c == '/') { // likely comment + return false; + } else if (c == '(') { + return parse_label(&line[i]); + } else if (('!' <= c && c <= '\'') || ('*' <= c && c <= '~')) { + ++g_instruction_number; // likely instruction found + return false; + } else { + error("syntax error: invalid character '%c' found in " + "line\n", c); + } + } + + return false; +} + static bool parse_a_type(const char *line, uint16_t *instruction) { char c, a_field_str[6]; uint32_t a_field = 0; size_t i, a = 0; - DBGLOG("line: %s\n", comp_line); + //DBGLOG("line: %s\n", line); if (line[0] != '@') { error("syntax error: A-type instruction doesn't start with @\n"); return false; } - if (line[1] == '\0') { + c = line[1]; + + if (c == '\0') { error("syntax error: A-type instruction empty after @\n"); return false; } - for (i = 1; (c = line[i]) != '\0' && a < 6; ++i) { - if ('0' <= c && c <= '9') { - if (a > 4) { - error("error: @<number> too long\n"); + if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { + // parse symbol/label, handle appropriately + // lookup symbol + // if (symbol already exists as label) + // a_field = symbol value from table + // else + // calc new variable value + // put symbol in table w/ new variable value + // a_field = new value + // increment global variable value counter + DBGLOG("Found label/variable in A-instruction '%s'\n", line); + a_field = 65; // STUB!!! + } else { + for (i = 1; (c = line[i]) != '\0' && a < 6; ++i) { + if ('0' <= c && c <= '9') { + if (a > 4) { + error("error: @<number> too long\n"); + return false; + } + a_field_str[a] = c; // get number + a++; + } else if ((c == ' ' || c == '\t' || c == '/') && i > 1) { + break; + } else { // any other character + error("syntax error: invalid char '%c' found " + "after @\n", c); return false; } - a_field_str[a] = c; // get number - a++; - } else if ((c == ' ' || c == '\t' || c == '/') && i > 1) { - break; - } else { // any other character - error("syntax error: invalid char '%c' found after @\n", - c); - return false; } - } - a_field_str[a] = '\0'; + a_field_str[a] = '\0'; + // TODO: basic check on a_field_str length? at least 1? - a_field = myatoi(a_field_str); // TODO: maybe negative number support? + a_field = myatoi(a_field_str); // TODO: maybe negative number support? + } + if (a_field > 32767) { error("error: %u > 32767, too large\n", a_field); return false;@@ -100,7 +352,7 @@ */
static bool parse_c_type_dest(const char *dest_line, uint8_t *dest) { size_t len; - DBGLOG("dest_line: %s\n", dest_line); + //DBGLOG("dest_line: %s\n", dest_line); for (len = 0; dest_line[len] != '='; ++len) { // read until '=' if (dest_line[len] == 'A') {@@ -146,7 +398,7 @@ */
static bool parse_c_type_comp(const char *comp_line, uint8_t *comp) { size_t len; - DBGLOG("comp_line: %s\n", comp_line); + //DBGLOG("comp_line: %s\n", comp_line); for (len = 0; comp_line[len] == '0' || comp_line[len] == '1' || comp_line[len] == '-' || comp_line[len] == 'D' ||@@ -259,7 +511,7 @@ {
size_t len; char *err_3rd_char = "syntax error: 3rd letter in jump field incorrect\n"; - DBGLOG("jump_line: %s\n", dest_line); + //DBGLOG("jump_line: %s\n", jump_line); for (len = 0; jump_line[len] == 'J' || jump_line[len] == 'G' || jump_line[len] == 'T' || jump_line[len] == 'E' ||@@ -439,7 +691,7 @@ for (i = 0; (c = line[i]) != '\0'; ++i) {
if (c == ' ' || c == '\t') { continue; } else if (i == 0 && c == '\0') { - return false; + return false; // originally bugfix } else if (c == '/') { if (slash_found) { // second slash means comment return false;@@ -450,6 +702,8 @@ } else if (slash_found) {
// this char not slash, but previous was: invalid syntax error("syntax error: found '/', comments need '//'\n"); return false; + } else if (c == '(') { // found label, ignore and return + return false; } else { // non-whitespace/slash discovered break; }@@ -479,14 +733,57 @@
in_file_path = argv[1]; fp = fopen(in_file_path, "r"); if (fp == NULL) { - fprintf(stderr, "failed to open file for reading\n"); + fprintf(stderr, "failed to open file %s for reading\n", + in_file_path); + exit(-1); + } + + if(!init_symbol_list()) { + fprintf(stderr, "internal: failed to initialize symbol list\n"); exit(-1); } - file_line = 0; + DBGLOG("Symbol initialization complete.\n"); + + file_line = 1; + // first pass to read labels and associate with values while (fgets(in_line, MAX_LINE_LEN, fp) != NULL) { // parse loop + for (i = 0; in_line[i] != '\0'; ++i) { // remove newlines + if (in_line[i] == '\n' || in_line[i] == '\r') { + in_line[i] = '\0'; + break; + } + ++in_line_len; + } + + if (in_line_len == 0 || in_line_len == 1) + continue; + + g_asm_line = in_line; + g_asm_line_number = file_line; + result = parse_line_for_label(in_line); + if (result) { + DBGLOG("DEBUG: label found in line %s\n", in_line); + } + ++file_line; + } + + if (fclose(fp)) { + fprintf(stderr, "Error closing file %s. Aborting...\n", in_file_path); + } + + // TODO: keep file open, restart read from beginning + fp = fopen(in_file_path, "r"); + if (fp == NULL) { + fprintf(stderr, "failed to open file %s for reading\n", + in_file_path); + exit(-1); + } + + file_line = 1; + while (fgets(in_line, MAX_LINE_LEN, fp) != NULL) { // parse loop for (i = 0; in_line[i] != '\0'; ++i) { // remove newlines if (in_line[i] == '\n' || in_line[i] == '\r') { in_line[i] = '\0';@@ -506,7 +803,13 @@ if (result) {
print_binary_word16(instruction); putchar('\n'); } + + ++file_line; } + + debug_dump_all_symbols(); + + free_all_symbols(); if (fclose(fp)) { fprintf(stderr, "Failed to close file %s\n", in_file_path);