Finish label/variable symbol support, misc tidy up
x1phosura x1phosura@x1phosura.zone
Fri, 13 Jan 2023 04:22:30 -0800
2 files changed,
96 insertions(+),
59 deletions(-)
A
projects/06/POSSIBLE-BUGS.md
@@ -0,0 +1,8 @@
+ +assembler1: +If a line contains a label larger than `(MAX_LINE_LEN - line_length)` (Note: technically beside the point here, but `MAX_LINE_LEN == 256`), the label may get truncated or something else will happen (I'm unsure...) + +Junk may be allowed after C-type instruction based on how it's parsed (parsing exits once expected characters are found in the right places, so the whole line isn't necessarily validated) + +It might be unsafe to assume `comp_start` and `jump_start` start at `&line[i+1]` after the '+' or ';' in `parse_c_type()` (unsure) +
M
projects/06/assembler1/assembler1.c
→
projects/06/assembler1/assembler1.c
@@ -10,6 +10,10 @@ #define error(...) fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "%lu | %s\n", \ g_asm_line_number, g_asm_line); +#define valid_symbol_char(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') \ + || ('a' <= c && c <= 'z') || c == '_' || c == '.' \ + || c == '$' || c == ':') + #define MAX_LINE_LEN 256 #define MAX_SYMBOL_STR_LEN MAX_LINE_LEN - 2@@ -42,6 +46,7 @@ // label and variable symbols will effectively be treated the same; the only
// difference is in their use (in a hack program) struct symbol_t *g_symbol_list[MAX_SYMBOLS + RESERVED_LABEL_NUM]; size_t g_symbol_list_len = 0; +size_t g_current_variable_address = 16; // address for next variable symbol void print_binary_word16(uint16_t w)@@ -181,15 +186,11 @@ size_t i;
struct symbol_t *s; for (i = 0; i < RESERVED_LABEL_NUM; ++i) { - //DBGLOG("init_symbol_list: adding symbol {%s, %hu}\n", - // g_reserved_symbol_strs[i], g_reserved_symbol_values[i]); if((s = create_symbol(g_reserved_symbol_strs[i], g_reserved_symbol_values[i])) == NULL) { - //DBGLOG("init_symbol_list: failed to create symbol\n"); return false; // failed to create symbol } if (!add_symbol(s)) { - //DBGLOG("init_symbol_list: failed to add symbol\n"); return false; // failed to add symbol } }@@ -197,22 +198,17 @@
return true; } -// assumes line[0] == '(' -bool parse_label(const char *line) +// assumes line starts at symbol (_not_ at '(' character, if relevant) +bool parse_symbol_str(const char *line, char *symbol_str, size_t *symbol_len) { + char c; size_t i; - uint16_t value; - char c, labelstr[MAX_SYMBOL_STR_LEN + 1]; - struct symbol_t *symbol; - //DBGLOG("Parsing label: line %lu | %s\n", g_asm_line_number, line); + //DBGLOG("Parsing symbol string: line %lu | %s\n", g_asm_line_number, line); - if (line[0] != '(') { // just to be safe - return false; // not a label; label's look like (THIS) - } - - for (i = 1; (c = line[i]) != '\0'; ++i) { - if (i == 1 && ('0' <= c && c <= '9')) { + *symbol_len = 0; + for (i = 0; (c = line[i]) != '\0'; ++i) { + if (i == 0 && ('0' <= c && c <= '9')) { error("syntax error: label starts with a number\n"); return false; }@@ -221,38 +217,57 @@ if (i > MAX_SYMBOL_STR_LEN) {
error("syntax error: label is too long (> %d chars)\n", MAX_SYMBOL_STR_LEN); return false; - } else if (c == ')') { + } else if (valid_symbol_char(c)) { + symbol_str[i] = c; + } else { if (i == 1) { error("syntax error: empty label '()'\n"); return false; } - labelstr[i-1] = '\0'; - - // create label, add to symbol list - value = (uint16_t)g_instruction_number; // safe. won't - // be > 65535 - symbol = create_symbol(labelstr, value); - if (symbol == NULL) - return false; - return add_symbol(symbol); break; - } else if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') - || ('a' <= c && c <= 'z') || c == '_') { - // TODO TODO: enforce label name NOT start with a number! - labelstr[i-1] = c; - } else { - error("syntax error: disallowed character '%hhu' found" - " in label\n", c); - return false; } } - if (c == '\0') { // should_ be ')' after break if all goes as expected + symbol_str[i] = '\0'; + *symbol_len = i; + + return true; +} + +// assumes line[0] == '(' +bool parse_label(const char *line) +{ + bool ret; + size_t label_len; + uint16_t value; + char label_str[MAX_SYMBOL_STR_LEN + 1]; + struct symbol_t *symbol; + + //DBGLOG("Parsing label: line %lu | %s\n", g_asm_line_number, line); + + if (line[0] != '(') { // just to be safe + return false; // not a label; label's look like (THIS) + } + + ret = parse_symbol_str(&line[1], label_str, &label_len); + if (!ret) { + return false; + } + + if (line[label_len + 1] != ')') { + error("line[label_len = %lu] value is '%c'\n", label_len, line[label_len]) error("syntax error: no matching ')' found for label"); + return false; } - return false; + // create label, add to symbol list + value = (uint16_t)g_instruction_number; // safe. won't + // be > 65535 + symbol = create_symbol(label_str, value); + if (symbol == NULL) + return false; + return add_symbol(symbol); } bool parse_line_for_label(const char *line)@@ -283,9 +298,11 @@ }
static bool parse_a_type(const char *line, uint16_t *instruction) { - char c, a_field_str[6]; + bool ret; + char c, a_field_str[6], symbol_str[MAX_SYMBOL_STR_LEN + 1]; uint32_t a_field = 0; - size_t i, a = 0; + size_t i, a = 0, symbol_len; + struct symbol_t *symbol; //DBGLOG("line: %s\n", line);@@ -301,18 +318,24 @@ error("syntax error: A-type instruction empty after @\n");
return false; } - if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { - // parse symbol/label, handle appropriately - // lookup symbol - // if (symbol already exists as label) - // a_field = symbol value from table - // else - // calc new variable value - // put symbol in table w/ new variable value - // a_field = new value - // increment global variable value counter - DBGLOG("Found label/variable in A-instruction '%s'\n", line); - a_field = 65; // STUB!!! + if (valid_symbol_char(c) && !('0' <= c && c <= '9')) { + ret = parse_symbol_str(&line[1], symbol_str, &symbol_len); + if (!ret) + return ret; + + symbol = lookup_symbol(symbol_str); + if (symbol == NULL) { // symbol doesn't exist in table, is new + a_field = g_current_variable_address; + symbol = create_symbol(symbol_str, a_field); + if (symbol == NULL) + return false; // create failed + ret = add_symbol(symbol); + if (!ret) + return ret; + ++g_current_variable_address; + } else { // symbol exists in table, use its value + a_field = symbol->value; // symbol value from table + } } else { for (i = 1; (c = line[i]) != '\0' && a < 6; ++i) { if ('0' <= c && c <= '9') {@@ -324,17 +347,22 @@ a_field_str[a] = c; // get number
a++; } else if ((c == ' ' || c == '\t' || c == '/') && i > 1) { break; - } else { // any other character + } else { // any other character encountered error("syntax error: invalid char '%c' found " "after @\n", c); return false; } } + if (a == 0) { + error("syntax error: A-type instruction empty " + "after '@'\n"); + return false; + } + a_field_str[a] = '\0'; - // TODO: basic check on a_field_str length? at least 1? - a_field = myatoi(a_field_str); // TODO: maybe negative number support? + a_field = myatoi(a_field_str); } if (a_field > 32767) {@@ -352,6 +380,7 @@ */
static bool parse_c_type_dest(const char *dest_line, uint8_t *dest) { size_t len; + //DBGLOG("dest_line: %s\n", dest_line); for (len = 0; dest_line[len] != '='; ++len) { // read until '='@@ -398,6 +427,7 @@ */
static bool parse_c_type_comp(const char *comp_line, uint8_t *comp) { size_t len; + //DBGLOG("comp_line: %s\n", comp_line); for (len = 0; comp_line[len] == '0' || comp_line[len] == '1' ||@@ -743,8 +773,6 @@ fprintf(stderr, "internal: failed to initialize symbol list\n");
exit(-1); } - DBGLOG("Symbol initialization complete.\n"); - file_line = 1; // first pass to read labels and associate with values while (fgets(in_line, MAX_LINE_LEN, fp) != NULL) { // parse loop@@ -762,9 +790,8 @@
g_asm_line = in_line; g_asm_line_number = file_line; result = parse_line_for_label(in_line); - if (result) { - DBGLOG("DEBUG: label found in line %s\n", in_line); - } + //if (result) + // DBGLOG("DEBUG: label found in line %s\n", in_line); ++file_line; }@@ -773,6 +800,8 @@
if (fclose(fp)) { fprintf(stderr, "Error closing file %s. Aborting...\n", in_file_path); } + + // TODO: skip second pass if error found in first pass // TODO: keep file open, restart read from beginning fp = fopen(in_file_path, "r");@@ -807,7 +836,7 @@
++file_line; } - debug_dump_all_symbols(); + //debug_dump_all_symbols(); free_all_symbols();