Rewrite: Add label symbol support, misc. tidying
x1phosura x1phosura@x1phosura.zone
Sat, 04 Feb 2023 06:03:46 -0800
1 files changed,
132 insertions(+),
137 deletions(-)
M
projects/06/assembler2/assembler2.c
→
projects/06/assembler2/assembler2.c
@@ -6,39 +6,35 @@ #include <string.h>
#define DBGLOG printf -#define valid_symbol_char(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') \ - || ('a' <= c && c <= 'z') || c == '_' || c == '.' \ - || c == '$' || c == ':') +#define symbol_char_ok(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') \ + || ('a' <= c && c <= 'z') || c == '_' || c == '.' \ + || c == '$' || c == ':') -#define error(...) (fprintf(stderr, __VA_ARGS__), -1) +#define error(...) fprintf(stderr, __VA_ARGS__) +#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) -#define MAX_LINE_LEN 256 // TODO: in/excludes NULL terminator? -#define MAX_SYMBOL_LEN MAX_LINE_LEN - 2 + 1 // + 1 for NULL terminator -#define MAX_SYMBOLS 32768 // TODO remove me - -char *g_asm_line; // currently-read line for convenience -size_t g_asm_line_number; // current line number -size_t g_instruction_number = 0; // instruction offset +#define MAX_LINE_LEN 256 // TODO: in/excludes NULL terminator? +#define MAX_SYMBOL_LEN MAX_LINE_LEN - 2 + 1 // + 1 for NULL terminator -struct symbol_t { - char *symbolstr; - uint16_t value; -}; +size_t instruction_offset = 0; // (TODO), used to be g_instruction_number -char rom[32768]; +uint16_t rom[32768]; size_t rom_index = 0; -char symbol_strs[32768 + 23][MAX_SYMBOL_LEN]; // TODO: double-check 2D array -uint16_t symbol_vals[32768]; -uint16_t symbol_index; - -#define RESERVED_LABEL_NUM 23 // TODO: remove me +char *file_line; // reference to currently-read line (for convenience) +size_t file_line_no; // line number, regardless of line content -// label and variable symbols will effectively be treated the same; the only -// difference is in their use (in a hack program) -struct symbol_t *g_symbol_list[32768 + RESERVED_LABEL_NUM]; -size_t g_symbol_list_len = 0; -size_t g_current_variable_address = 16; // address for next variable symbol +// label and variable symbols are treated the same internally +#define MAX_SYMBOLS 32768 +char *reserved_strs[] = {"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "SP", "LCL", "ARG", "THIS", "THAT", "SCREEN", "KBD"}; +uint16_t reserved_vals[] = {0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 0x4000, 0x6000}; +char symbol_strs[MAX_SYMBOLS + sizeof(reserved_vals)][MAX_SYMBOL_LEN]; +uint16_t symbol_vals[MAX_SYMBOLS + sizeof(reserved_vals)]; +uint16_t symbol_index; void print_binary_word16(uint16_t w)@@ -57,6 +53,18 @@ }
printf("%s", binary_string); } +// TODO refactor +void binprint_rom(uint16_t *rom, size_t rom_size) +{ + size_t i; + printf("rom_index = %lu, rom_size = %lu\n", rom_index, rom_size); + for (i = 0; i < rom_size; ++i) { + print_binary_word16(rom[i]); + putchar('\n'); + } +} + +/* static uint32_t myatoi(const char *str) { size_t i;@@ -68,31 +76,19 @@ }
return ret; } +*/ -size_t skip_whitespace(const char *line, size_t n) { +size_t skip_whitespace(const char *line, size_t n) +{ size_t i = 0; - char c = line[i]; - - for (i; (c == ' ' || c == '\t') && c != '\0' && i < n; ++i); - return i; // TODO double-check above loop logic + while ((line[i] == ' ' || line[i] == '\t') && i < n) + ++i; + return i; } -// TODO: will still need to handle multi-line-comments -bool is_single_line_comment(const char *line) { - size_t i; - - i = skip_whitespace(line, MAX_LINE_LEN); - if (line[i] == '/') - if (line[i + 1] == '/' && !(i >= MAX_LINE_LEN)) - return true; - return false; -} - -// TODO FIX! void debug_dump_all_symbols() { size_t i; - DBGLOG("-------- DEBUG SYMBOL DUMP --------\n"); for (i = 0; i < symbol_index; ++i) { DBGLOG("symbol (%s, %hu)\n", symbol_strs[i], symbol_vals[i]);@@ -100,23 +96,20 @@ }
DBGLOG("-------- END SYMBOL DUMP --------\n"); } -bool lookup_symbol(const char *str, uint16_t *val) +char *lookup_symbol(const char *str, uint16_t *val) { size_t i; - for (i = 0; i < sizeof(symbol_strs) / sizeof(symbol_strs[0]); ++i) { if (strncmp(symbol_strs[i], str, MAX_SYMBOL_LEN) == 0) { *val = symbol_vals[i]; - return true; + return symbol_strs[i]; // TODO: double check } } - - return false; + return NULL; } bool add_symbol(const char *str, uint16_t val) { - size_t i; uint16_t tmp; if (symbol_index > 32767) {@@ -124,7 +117,7 @@ // TODO: print appropriate error message
return false; } - if (lookup_symbol(str, &tmp)) { + if (lookup_symbol(str, &tmp) != NULL) { // TODO: print appropriate error message return false; }@@ -137,110 +130,109 @@ return true;
} // pre-fill symbol lists with 'reserved' symbols and values -bool init_symbol_list() +bool init_reserved_symbols() { size_t i; - - char *reserved_strs[] = {"R0", "R1", "R2", "R3", "R4", "R5", - "R6", "R7", "R8", "R9", "R10", "R11", - "R12", "R13", "R14", "R15", "SP", "LCL", - "ARG", "THIS", "THAT", "SCREEN", "KBD"}; - uint16_t reserved_vals[] = {0, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 0, 1, - 2, 3, 4, 0x4000, 0x6000}; - for (i = 0; i < sizeof(reserved_strs) / sizeof(reserved_strs[0]); ++i) { - if (!add_symbol(reserved_strs[i], reserved_vals[i])) + if (!add_symbol(reserved_strs[i], reserved_vals[i])) { + error("error: failed to initialize reserved symbols\n"); return false; + } } return true; } -bool pass2(FILE *in_file) -{ - bool result = false; - uint16_t instruction; - char in_line[MAX_LINE_LEN]; - size_t i, file_line, in_line_len; +// assumes line[0] == '(' +bool parse_line_for_label(const char *line) { + size_t i; + char c, label_str[MAX_SYMBOL_LEN + 1]; - file_line = 1; - while (fgets(in_line, MAX_LINE_LEN, in_file) != NULL) { // parse loop - for (i = 0; in_line[i] != '\0'; ++i) { // remove newlines - if (in_line[i] == '\n' || in_line[i] == '\r') { - in_line[i] = '\0'; - break; - } - ++in_line_len; + DBGLOG("parsing label: line %lu | %s\n", file_line_no, line); + for (i = 1; i < MAX_SYMBOL_LEN; ++i) { + c = line[i]; + if (c == '\0' || c == ')') + break; + if (!symbol_char_ok(c)) { + error("error: label contains invalid char %c\n", c); + return false; } + label_str[i - 1] = line[i]; + } - if (in_line_len == 0 || in_line_len == 1) - continue; + if (c != ')') { + error("error: no matching ')' found for label\n"); + return false; + } - g_asm_line = in_line; - g_asm_line_number = file_line; - - printf("line %lu: %s\n", file_line, in_line); - result = true; - /* - result = parse_line(in_line, &instruction); + label_str[i - 1] = '\0'; - if (result) { - print_binary_word16(instruction); - putchar('\n'); - //TODO: put instruction in ROM - } - */ + //return add_symbol(label_str, 65); + return add_symbol(label_str, (uint16_t)instruction_offset); +} - ++file_line; - } - - return result; +bool parse_instruction(const char *line, uint16_t *instruction) +{ + static uint16_t val = 0x56; + DBGLOG("parsing instruction in line %lu | %s\n", file_line_no, line); + *instruction = val++; + return true; // STUB TODO: remove } -bool pass1(FILE *in_file) +// if first_pass == true: read labels and associate them with values +// else: (second pass) parse instructions +// Still unsure how I feel about doing handling both passes in one function. +// It's hacky, but also space saving and elegant in a way. +bool pass(FILE *in_file, bool first_pass) { - bool result = false; + bool result = true; + char *line, in_line[MAX_LINE_LEN]; uint16_t instruction; - char in_line[MAX_LINE_LEN]; - size_t i, file_line, in_line_len; + size_t i, line_len; - if (!init_symbol_list()) { - return false; - } + if (first_pass) + if (!init_reserved_symbols()) + return false; - file_line = 1; - // first pass to read labels and associate with values + file_line_no = 0; while (fgets(in_line, MAX_LINE_LEN, in_file) != NULL) { // parse loop - for (i = 0; in_line[i] != '\0'; ++i) { // remove newlines - if (in_line[i] == '\n' || in_line[i] == '\r') { - in_line[i] = '\0'; + ++file_line_no; + file_line = in_line; + line = &in_line[skip_whitespace(in_line, MAX_LINE_LEN)]; + + // remove newlines and get line length + line_len = 0; + for (i = 0; line[i] != '\0'; ++i) { + if (line[i] == '\n' || line[i] == '\r') { + line[i] = '\0'; break; } - ++in_line_len; + ++line_len; } - if (in_line_len == 0 || in_line_len == 1) + if (line_len == 0) // "empty" line continue; - - if (in_line_len >= 2) - if (in_line[0] == '/' && in_line[1] == '/') - continue; // comment found - - g_asm_line = in_line; - g_asm_line_number = file_line; - printf("line %lu. If this was real, it would call " - "parse_line_for_label()\n", file_line); - result = true; - ////result = parse_line_for_label(in_line); - //if (result) - // DBGLOG("DEBUG: label found in line %s\n", in_line); - - ++file_line; + if (line_len > 1) + if (line[0] == '/' && line[1] == '/') // comment found + continue; + if (line[0] == '(') { + if (first_pass) { + if (!parse_line_for_label(line)) + result = false; + } else + continue; + } else { + if (!first_pass) { + if (parse_instruction(line, &instruction)) + rom[rom_index++] = instruction; + } + ++instruction_offset; + continue; // first pass: if not label, ignore line + } } - debug_dump_all_symbols(); + if (first_pass) + debug_dump_all_symbols(); return result; }@@ -253,33 +245,36 @@ {
FILE *in_file, *out_file; char *in_file_path, *out_file_path; if (argc != 3) // requires 2 arguments - return error(usage_msg); // TODO: eventually support STDOUT + die(usage_msg); // TODO: eventually support STDOUT in_file_path = argv[1]; out_file_path = argv[2]; in_file = fopen(in_file_path, "r"); if (in_file == NULL) - return error("failed to open assembly file for reading\n"); + die("failed to open assembly file for reading\n"); - if(!pass1(in_file)) - return error("failed to parse labels/variables in file\n"); + DBGLOG("FIRST PASS\n"); + if(!pass(in_file, true)) // first pass + die("failed to parse labels/variables in file\n"); if (fseek(in_file, 0, SEEK_SET)) - return error("failed to re-read file from beginning\n"); + die("failed to re-read file from beginning\n"); - if(!pass2(in_file)) - return error("failed to parse assembly in file\n"); + DBGLOG("SECOND PASS\n"); + if(!pass(in_file, false)) // second pass + die("failed to parse assembly in file\n"); out_file = fopen(out_file_path, "wb"); if (out_file == NULL) - return error("failed to open output file for writing\n"); + die("failed to open output file for writing\n"); - //fwrite(assembled, sizeof(assembled), 1, out_fp); + //fwrite(rom, rom_index, 1, out_fp); // TODO: double-check + binprint_rom(rom, rom_index); if (fclose(in_file)) - return error("failed to close assembly file\n"); + die("failed to close assembly file\n"); if (fclose(out_file)) - return error("failed to close output file\n"); + die("failed to close output file\n"); return 0; }