all repos — nand2tetris @ c5ab5300b95bfef6db00c654c5ae76f06130c988

my nand2tetris progress

Rewrite: Add A-type instr support, misc. refactors
x1phosura x1phosura@x1phosura.zone
Mon, 06 Feb 2023 04:04:23 -0800
commit

c5ab5300b95bfef6db00c654c5ae76f06130c988

parent

aebb96453a070c3eb8f4e0fe82b8ae3e46d181bb

1 files changed, 113 insertions(+), 83 deletions(-)

jump to
M projects/06/assembler2/assembler2.cprojects/06/assembler2/assembler2.c

@@ -6,17 +6,20 @@ #include <string.h>

#define DBGLOG printf -#define symbol_char_ok(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') \ +#define err(...) (fprintf(stderr, __VA_ARGS__), \ + fprintf(stderr, "%lu | %s\n", file_line_no, file_line), false) +#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) + +#define is_symbol_char(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') \ || ('a' <= c && c <= 'z') || c == '_' || c == '.' \ || c == '$' || c == ':') - -#define error(...) fprintf(stderr, __VA_ARGS__) -#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) +#define is_whitespace(c) ((c == ' ' || c == '\t')) +#define is_number(c) (('0' <= c && c <= '9')) #define MAX_LINE_LEN 256 // TODO: in/excludes NULL terminator? #define MAX_SYMBOL_LEN MAX_LINE_LEN - 2 + 1 // + 1 for NULL terminator -size_t instruction_offset = 0; // (TODO), used to be g_instruction_number +size_t instruction_offset = 0; uint16_t rom[32768]; size_t rom_index = 0;

@@ -35,92 +38,66 @@ 0, 1, 2, 3, 4, 0x4000, 0x6000};

char symbol_strs[MAX_SYMBOLS + sizeof(reserved_vals)][MAX_SYMBOL_LEN]; uint16_t symbol_vals[MAX_SYMBOLS + sizeof(reserved_vals)]; uint16_t symbol_index; - +uint16_t next_var_address = 16; void print_binary_word16(uint16_t w) { - unsigned char i, msb; + unsigned char i; char binary_string[17]; + for (i = 0; i < 16; ++i, w <<= 1) + binary_string[i] = (((w >> 15) & 0x01) != 0) ? '1' : '0'; binary_string[16] = '\0'; - - for (i = 0; i < 16; ++i) { - msb = (char)(w >> 15); - - if (msb != 0) binary_string[i] = '1'; - else binary_string[i] = '0'; - w <<= 1; - } - printf("%s", binary_string); + printf("%s\n", binary_string); } -// TODO refactor -void binprint_rom(uint16_t *rom, size_t rom_size) +static void print_rom_binary(uint16_t *rom, size_t rom_size) { size_t i; - printf("rom_index = %lu, rom_size = %lu\n", rom_index, rom_size); - for (i = 0; i < rom_size; ++i) { + for (i = 0; i < rom_size; ++i) print_binary_word16(rom[i]); - putchar('\n'); - } } -/* -static uint32_t myatoi(const char *str) +static size_t skip_whitespace(const char *line, size_t n) { size_t i; - uint32_t ret = 0; - - for (i = 0; i < 5 && '0' <= str[i] && str[i] <= '9'; ++i) { - ret = (ret * 10) + (str[i] - 0x30); - } - - return ret; -} -*/ - -size_t skip_whitespace(const char *line, size_t n) -{ - size_t i = 0; - while ((line[i] == ' ' || line[i] == '\t') && i < n) - ++i; + for (i = 0; is_whitespace(line[i]) && i < n; ++i); return i; } -void debug_dump_all_symbols() +static void debug_dump_all_symbols() { size_t i; DBGLOG("-------- DEBUG SYMBOL DUMP --------\n"); - for (i = 0; i < symbol_index; ++i) { + for (i = 0; i < symbol_index; ++i) DBGLOG("symbol (%s, %hu)\n", symbol_strs[i], symbol_vals[i]); - } DBGLOG("-------- END SYMBOL DUMP --------\n"); } -char *lookup_symbol(const char *str, uint16_t *val) +static char *lookup_symbol(const char *str, uint16_t *val) { size_t i; + for (i = 0; i < sizeof(symbol_strs) / sizeof(symbol_strs[0]); ++i) { if (strncmp(symbol_strs[i], str, MAX_SYMBOL_LEN) == 0) { *val = symbol_vals[i]; - return symbol_strs[i]; // TODO: double check + return symbol_strs[i]; } } + return NULL; } -bool add_symbol(const char *str, uint16_t val) +static bool add_symbol(const char *str, uint16_t val) { uint16_t tmp; - if (symbol_index > 32767) { - // TODO: print appropriate error message - return false; - } + if (symbol_index > 32767) + return err("error: failed to add symbol '%s': %hu > 32767\n", + str, val); - if (lookup_symbol(str, &tmp) != NULL) { - // TODO: print appropriate error message - return false; - } + if (lookup_symbol(str, &tmp) != NULL) + return err("error: failed to add symbol '%s': symbol already " + "exists\n", str); strncpy(symbol_strs[symbol_index], str, MAX_SYMBOL_LEN); symbol_vals[symbol_index] = val;

@@ -130,15 +107,13 @@ return true;

} // pre-fill symbol lists with 'reserved' symbols and values -bool init_reserved_symbols() +static bool init_reserved_symbols() { size_t i; - for (i = 0; i < sizeof(reserved_strs) / sizeof(reserved_strs[0]); ++i) { - if (!add_symbol(reserved_strs[i], reserved_vals[i])) { - error("error: failed to initialize reserved symbols\n"); - return false; - } - } + + for (i = 0; i < sizeof(reserved_strs) / sizeof(reserved_strs[0]); ++i) + if (!add_symbol(reserved_strs[i], reserved_vals[i])) + return err("error: failed to init reserved symbols\n"); return true; }

@@ -148,35 +123,89 @@ bool parse_line_for_label(const char *line) {

size_t i; char c, label_str[MAX_SYMBOL_LEN + 1]; - DBGLOG("parsing label: line %lu | %s\n", file_line_no, line); for (i = 1; i < MAX_SYMBOL_LEN; ++i) { c = line[i]; if (c == '\0' || c == ')') break; - if (!symbol_char_ok(c)) { - error("error: label contains invalid char %c\n", c); - return false; - } + if (!is_symbol_char(c)) + return err("error: invalid char '%c' in label\n", c); label_str[i - 1] = line[i]; } - if (c != ')') { - error("error: no matching ')' found for label\n"); - return false; - } + if (c != ')') + return err("error: no matching ')' found for label\n"); label_str[i - 1] = '\0'; - //return add_symbol(label_str, 65); return add_symbol(label_str, (uint16_t)instruction_offset); } +bool parse_c_type(const char *line, uint16_t *instruction) +{ + DBGLOG("C-type, offset %lu, line %lu | %s\n", instruction_offset, + file_line_no, line); + uint16_t val = 0xb9c3; + *instruction = val; + return true; // STUB TODO remove me +} + +bool parse_a_type(const char *line, uint16_t *instruction) +{ + char a_str[MAX_SYMBOL_LEN + 1], *sym_ptr; + size_t i, a_str_len; + uint16_t a_field = 0; + + for (i = 1; i < MAX_SYMBOL_LEN && is_symbol_char(line[i]); ++i) { + a_str[i - 1] = line[i]; + } + + if (!(is_whitespace(line[i]) || line[i] == '\0' || line[i] == '/')) + return err("syntax error: invalid char '%c' in instruction\n", + line[i]); + + a_str_len = i - 1; + a_str[a_str_len] = '\0'; + + if (a_str_len == 0) + return err("syntax error: instruction empty after @\n"); + + if (is_number(a_str[0])) { // @<string> probably number + if (a_str_len > 5) + return err("syntax error: instruction field '%s' too " + "large\n", a_str); + for (i = 0; i < 5 && is_number(a_str[i]); ++i) { + if (!is_number(a_str[i])) + return err("syntax error: '%s' not a valid " + "number\n", a_str); + a_field = (a_field * 10) + (a_str[i] - 0x30); + } + if (a_field > 0x7fff) + return err("syntax error: %u > 32767, too large\n", + a_field); + } else { // @<string> probably symbol + sym_ptr = lookup_symbol(a_str, &a_field); + if (sym_ptr == NULL) { + a_field = next_var_address++; + if (!add_symbol(a_str, a_field)) + return err("error: failed to add var '%s'\n", + a_str); + } + } + + *instruction = a_field; + return true; +} + +// does not care about line length; exits at first newline or after relevant +// portion parsed (allows for syntactically-incorrect lines, I know) bool parse_instruction(const char *line, uint16_t *instruction) { - static uint16_t val = 0x56; - DBGLOG("parsing instruction in line %lu | %s\n", file_line_no, line); - *instruction = val++; - return true; // STUB TODO: remove + if (line[0] == '@') + return parse_a_type(line, instruction); + else if (line[0] >= '!' && line[0] < '~') + return parse_c_type(line, instruction); + + return err("syntax error: invalid char '%c' found\n", line[0]); } // if first_pass == true: read labels and associate them with values

@@ -185,7 +214,7 @@ // Still unsure how I feel about doing handling both passes in one function.

// It's hacky, but also space saving and elegant in a way. bool pass(FILE *in_file, bool first_pass) { - bool result = true; + bool ret = true, second_pass = !first_pass; char *line, in_line[MAX_LINE_LEN]; uint16_t instruction; size_t i, line_len;

@@ -195,19 +224,19 @@ if (!init_reserved_symbols())

return false; file_line_no = 0; + instruction_offset = 0; while (fgets(in_line, MAX_LINE_LEN, in_file) != NULL) { // parse loop ++file_line_no; file_line = in_line; line = &in_line[skip_whitespace(in_line, MAX_LINE_LEN)]; - // remove newlines and get line length line_len = 0; for (i = 0; line[i] != '\0'; ++i) { if (line[i] == '\n' || line[i] == '\r') { - line[i] = '\0'; + line[i] = '\0'; // remove newlines break; } - ++line_len; + ++line_len; // get line length } if (line_len == 0) // "empty" line

@@ -218,13 +247,15 @@ continue;

if (line[0] == '(') { if (first_pass) { if (!parse_line_for_label(line)) - result = false; + ret = false; } else continue; } else { - if (!first_pass) { + if (second_pass) { if (parse_instruction(line, &instruction)) rom[rom_index++] = instruction; + else + ret = false; } ++instruction_offset; continue; // first pass: if not label, ignore line

@@ -234,7 +265,7 @@

if (first_pass) debug_dump_all_symbols(); - return result; + return ret; }

@@ -254,7 +285,6 @@ in_file = fopen(in_file_path, "r");

if (in_file == NULL) die("failed to open assembly file for reading\n"); - DBGLOG("FIRST PASS\n"); if(!pass(in_file, true)) // first pass die("failed to parse labels/variables in file\n");

@@ -270,7 +300,7 @@ if (out_file == NULL)

die("failed to open output file for writing\n"); //fwrite(rom, rom_index, 1, out_fp); // TODO: double-check - binprint_rom(rom, rom_index); + print_rom_binary(rom, rom_index); if (fclose(in_file)) die("failed to close assembly file\n"); if (fclose(out_file))