Reorganize codebase, misc n2tasm1 tweaks/fixes
@@ -1,17 +1,19 @@
- +# nand2tetris chapter 6 assembler makefile +# for debugging this makefile: `make -d` CFLAGS = -std=c99 -Wall -Wextra -all: bin/assembler1 bin/assembler2 +all: bin/n2tasm1 bin/n2tasm2 -# TODO: clean up, make more Makefile-idiomatic +bin/n2tasm%: src/n2tasm%.c + $(CC) $(DEBUG) $(CFLAGS) -o $@ $< -bin/assembler1: assembler1/assembler1.c - $(CC) $(CFLAGS) -o $@ $< +debug: DEBUG = -D_DEBUG +debug: all -bin/assembler2: assembler2/assembler2.c - $(CC) $(CFLAGS) -o $@ $< +#test: +# TODO: write tests, decide suitable way to run clean: rm -vf bin/* -.PHONY: clean +.PHONY: clean debug
@@ -1,9 +1,15 @@
- # Assembler(s) Approach: write 2-3 assemblers, then compare all three, their approaches, pros/cons, and write retrospective. 1. Write an assembler purely from scratch w/o any "spoilers" or implementation tips from the book or online (besides what I already know). +- DONE 2. Write an assembler based off of an elegant assmbler I've already seen online (ex. 100rabbits assembler) +- Inspiration taken from the following code: + - https://github.com/dmatlack/chip8 + - https://git.sr.ht/~rabbits/gyo + - https://git.sr.ht/~rabbits/uxn11 +- DONE 3. Write the assembler based on the book's recommended implementation/organization. +- I may/may not do this, time permitting.
@@ -4,9 +4,13 @@ #include <stdio.h>
#include <stdlib.h> #include <string.h> -#define DBGLOG(...) printf(__VA_ARGS__) -//#define DBGLOG(...) -#define error(...) fprintf(stderr, __VA_ARGS__); \ +#ifdef _DEBUG +#define DBGLOG printf +#else +#define DBGLOG(...) +#endif + +#define err(...) fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "%lu | %s\n", \ g_asm_line_number, g_asm_line); \ g_error_encountered = true;@@ -52,7 +56,7 @@ size_t g_symbol_list_len = 0;
size_t g_current_variable_address = 16; // address for next variable symbol -void print_binary_word16(uint16_t w) +void write_binary_word16(FILE *fp, uint16_t w) { unsigned char i, msb; char binary_string[17];@@ -67,7 +71,7 @@ else
binary_string[i] = '0'; w <<= 1; } - printf("%s", binary_string); + fprintf(fp, "%s\n", binary_string); } static uint32_t myatoi(const char *str)@@ -91,6 +95,7 @@ DBGLOG("-------- DEBUG SYMBOL DUMP --------\n");
for (i = 0; i < g_symbol_list_len; ++i) { symbol = g_symbol_list[i]; DBGLOG("symbol (%s, %hu)\n", symbol->symbolstr, symbol->value); + (void)symbol; // "use" to shut up compiler warning } DBGLOG("-------- END SYMBOL DUMP --------\n");@@ -121,14 +126,14 @@ //DBGLOG("creating symbol {'%s', %hu} ...\n", str, value);
len = strlen(str); if (len == 0) { - error("error creating empty label\n"); + err("error creating empty label\n"); return NULL; } symbol = malloc(sizeof(struct symbol_t)); symbolstr = malloc(len + 1); if (symbol == NULL || symbolstr == NULL) { - error("error creating label: malloc() returned NULL\n"); + err("error creating label: malloc() returned NULL\n"); exit(-1); }@@ -165,14 +170,14 @@ {
struct symbol_t *symbol_already_present; if (g_symbol_list_len > (MAX_SYMBOLS + RESERVED_LABEL_NUM)) { - error("internal: symbol list full, over %d symbols\n", + err("internal: symbol list full, over %d symbols\n", (MAX_SYMBOLS + RESERVED_LABEL_NUM)); return false; } symbol_already_present = lookup_symbol(symbol->symbolstr); if (symbol_already_present != NULL) { // if present - error("error: failed to add symbol %s: already found in list\n", + err("error: failed to add symbol %s: already found in list\n", symbol->symbolstr); return false; }@@ -212,19 +217,19 @@
*symbol_len = 0; for (i = 0; (c = line[i]) != '\0'; ++i) { if (i == 0 && ('0' <= c && c <= '9')) { - error("syntax error: label starts with a number\n"); + err("syntax error: label starts with a number\n"); return false; } // parse characters until closing ')' if (i > MAX_SYMBOL_STR_LEN) { - error("syntax error: label is too long (> %d chars)\n", + err("syntax error: label is too long (> %d chars)\n", MAX_SYMBOL_STR_LEN); return false; } else if (valid_symbol_char(c)) { symbol_str[i] = c; } else { if (i == 1) { - error("syntax error: empty label '()'\n"); + err("syntax error: empty label '()'\n"); return false; }@@ -259,7 +264,7 @@ return false;
} if (line[label_len + 1] != ')') { - error("syntax error: no matching ')' found for label, found " + err("syntax error: no matching ')' found for label, found " "value '%c' in label\n", line[label_len + 1]); return false; }@@ -291,7 +296,7 @@ } else if (('!' <= c && c <= '\'') || ('*' <= c && c <= '~')) {
++g_instruction_number; // likely instruction found return false; } else { - error("syntax error: invalid character '%c' found in " + err("syntax error: invalid character '%c' found in " "line\n", c); } }@@ -310,14 +315,14 @@
//DBGLOG("line: %s\n", line); if (line[0] != '@') { - error("syntax error: A-type instruction doesn't start with @\n"); + err("syntax error: A-type instruction doesn't start with @\n"); return false; } c = line[1]; if (c == '\0') { - error("syntax error: A-type instruction empty after @\n"); + err("syntax error: A-type instruction empty after @\n"); return false; }@@ -343,7 +348,7 @@ } else {
for (i = 1; (c = line[i]) != '\0' && a < 6; ++i) { if ('0' <= c && c <= '9') { if (a > 4) { - error("error: @<number> too long\n"); + err("error: @<number> too long\n"); return false; } a_field_str[a] = c; // get number@@ -351,15 +356,15 @@ a++;
} else if ((c == ' ' || c == '\t' || c == '/') && i > 1) { break; } else { // any other character encountered - error("syntax error: invalid char '%c' found " + err("syntax error: invalid char '%c' found " "after @\n", c); return false; } } if (a == 0) { - error("syntax error: A-type instruction empty " - "after '@'\n"); + err("syntax error: A-type instruction empty after " + "'@'\n"); return false; }@@ -369,7 +374,7 @@ a_field = myatoi(a_field_str);
} if (a_field > 32767) { - error("error: %u > 32767, too large\n", a_field); + err("error: %u > 32767, too large\n", a_field); return false; }@@ -389,36 +394,36 @@
for (len = 0; dest_line[len] != '='; ++len) { // read until '=' if (dest_line[len] == 'A') { if (*dest & 0x04) { // if A register already set - error("syntax error: A register set twice\n"); + err("syntax error: A register set twice\n"); return false; } *dest |= 0x04; } else if (dest_line[len] == 'D') { if (*dest & 0x02) { // if D register already set - error("syntax error: D register set twice\n"); + err("syntax error: D register set twice\n"); return false; } *dest |= 0x02; } else if (dest_line[len] == 'M') { if (*dest & 0x01) { // if M register already set - error("syntax error: M register set twice\n"); + err("syntax error: M register set twice\n"); return false; } *dest |= 0x01; } else { - error("syntax error: invalid destination register %c\n", + err("syntax error: invalid destination register %c\n", dest_line[len]); return false; } if (len >= 3) { - error("syntax error: dest field incorrect length %lu\n", + err("syntax error: dest field incorrect length %lu\n", len); return false; } } if (len == 0) { - error("syntax error: dest field empty\n"); + err("syntax error: dest field empty\n"); return false; }@@ -446,7 +451,7 @@ case 'D': *comp = 0x0c; break; // 0 001100
case 'A': *comp = 0x30; break; // 0 110000 case 'M': *comp = 0x70; break; // 1 110000 default: - error("syntax error: comp field incorrect value\n"); + err("syntax error: comp field incorrect value\n"); return false; } } else if (len == 2) { // -1 !D !A !M -D -A -M@@ -457,7 +462,7 @@ case 'D': *comp = 0x0f; break; // 0 001111
case 'A': *comp = 0x33; break; // 0 110011 case 'M': *comp = 0x73; break; // 1 110011 default: - error("syntax error: comp field incorrect value\n"); + err("syntax error: comp field incorrect value\n"); return false; } } else if (comp_line[0] == '!') {@@ -466,11 +471,11 @@ case 'D': *comp = 0x0d; break; // 0 001101
case 'A': *comp = 0x31; break; // 0 110001 case 'M': *comp = 0x71; break; // 1 110001 default: - error("syntax error: comp field incorrect value\n"); + err("syntax error: comp field incorrect value\n"); return false; } } else { - error("syntax error: comp field incorrect value\n"); + err("syntax error: comp field incorrect value\n"); return false; } } else if (len == 3) {@@ -481,7 +486,7 @@ *comp = 0x1f; // 0 011111
} else if (comp_line[1] == '-') { *comp = 0x0e; // 0 001110 } else { - error("syntax error: comp field " + err("syntax error: comp field " "incorrect value\n"); return false; }@@ -491,7 +496,7 @@ *comp = 0x40; // _1_ 000000
} else if (comp_line[2] == 'A') { *comp = 0x00; // _0_ 000000 } else { - error("syntax error: comp field " + err("syntax error: comp field " "incorrect value\n"); return false; }@@ -502,7 +507,7 @@ case '-': *comp |= 0x13; break; // 010011
case '&': *comp |= 0x00; break; // 000000 case '|': *comp |= 0x15; break; // 010101 default: - error("syntax error: comp field " + err("syntax error: comp field " "incorrect value\n"); return false; }@@ -514,7 +519,7 @@ *comp = 0x40; // _1_ 000000
} else if (comp_line[0] == 'A') { *comp = 0x00; // _0_ 000000 } else { - error("syntax error: comp field incorrect value\n"); + err("syntax error: comp field incorrect value\n"); return false; }@@ -525,12 +530,12 @@ *comp |= 0x32; // 1 110010
} else if (comp_line[1] == '-' && comp_line[2] == 'D') { *comp |= 0x07; // 1 000111 } else { - error("syntax error: comp field incorrect value\n"); + err("syntax error: comp field incorrect value\n"); return false; } } } else { - error("syntax error: comp field incorrect length %lu\n", len); + err("syntax error: comp field incorrect length %lu\n", len); return false; }@@ -552,7 +557,7 @@ jump_line[len] == 'Q' || jump_line[len] == 'L' ||
jump_line[len] == 'N' || jump_line[len] == 'M' || jump_line[len] == 'P'; ++len) {} if (len != 3) { - error("syntax error: jump field incorrect length %lu\n", len); + err("syntax error: jump field incorrect length %lu\n", len); return false; }@@ -564,14 +569,14 @@ *jump = 0x1;
} else if (jump_line[2] == 'E') { // if "JGE" *jump = 0x3; } else { - error(err_3rd_char); return false; + err(err_3rd_char); return false; } break; case 'E': // if "JE_" if (jump_line[2] == 'Q') { // if "JEQ" *jump = 0x2; } else { - error(err_3rd_char); return false; + err(err_3rd_char); return false; } break; case 'L': // if "JL_"@@ -580,29 +585,29 @@ *jump = 0x4;
} else if (jump_line[2] == 'E') { // if "JLE" *jump = 0x6; } else { - error(err_3rd_char); return false; + err(err_3rd_char); return false; } break; case 'N': // if "JN_" if (jump_line[2] == 'E') { // if "JNE" *jump = 0x5; } else { - error(err_3rd_char); return false; + err(err_3rd_char); return false; } break; case 'M': // if "JM_" if (jump_line[2] == 'P') { // if "JMP" *jump = 0x7; } else { - error(err_3rd_char); return false; + err(err_3rd_char); return false; } break; default: - error("syntax error: 2nd letter in jump field incorrect\n"); + err("syntax error: 2nd letter in jump field incorrect\n"); return false; } } else { - error("syntax error: jump field doesn't start with 'J'\n"); + err("syntax error: jump field doesn't start with 'J'\n"); return false; }@@ -637,7 +642,7 @@ dest_start = &line[0]; // start of line
// this 'i+1' might be dangerous! comp_start = &line[i+1]; // after "[dest]=" } else { - error("syntax error: dest incorrect length %lu\n", i); + err("syntax error: dest incorrect length %lu\n", i); return false; } } else if (c == ';') { // indicates jump field@@ -645,7 +650,7 @@ if (1 <= i && i <= 7) {
// this 'i+1' might be dangerous! jump_start = &line[i+1]; // after "[comp];" } else { - error("syntax error: jump incorrect length %lu\n", i); + err("syntax error: jump incorrect length %lu\n", i); return false; } }@@ -702,7 +707,7 @@ ret = parse_c_type(&line[i], instruction);
++g_instruction_number; break; } else { - error("syntax error: line '%s' incorrectly formatted\n", + err("syntax error: line '%s' incorrectly formatted\n", line); }@@ -733,7 +738,7 @@ slash_found = true;
continue; } else if (slash_found) { // this char not slash, but previous was: invalid syntax - error("syntax error: found '/', comments need '//'\n"); + err("syntax error: found '/', comments need '//'\n"); return false; } else if (c == '(') { // found label, ignore and return return false;@@ -747,7 +752,8 @@ return parse_next_instruction(line, instruction);
} -char *usage_msg = "Usage: assembler1 [path/to/file.asm]\n"; +char *usage_msg = "Usage: n2tasm1 [input/file.asm] [assembled/output]\n" + " n2tasm1 [input/file.asm] # will output to STDOUT\n"; int main(int argc, char *argv[]) {@@ -756,12 +762,27 @@ uint16_t instruction;
char in_line[MAX_LINE_LEN]; size_t i, file_line, in_line_len; char *in_file_path; - FILE *fp; + FILE *fp, *fp_out; + if (argc == 2) { + fp_out = stdout; + } else if (argc == 3) { + fp_out = fopen(argv[2], "wb"); + if (fp_out == NULL) { + fprintf(stderr, "failed to open %s for writing\n", + argv[2]); + exit(-1); + } + } else { + fprintf(stderr, usage_msg); + exit(-1); + } + /* if (argc != 2) { // requires 1 argument fprintf(stderr, usage_msg); exit(-1); } + */ in_file_path = argv[1]; fp = fopen(in_file_path, "r");@@ -814,7 +835,7 @@ }
file_line = 1; while (fgets(in_line, MAX_LINE_LEN, fp) != NULL) { // parse loop - fprintf(stderr, "line %lu: %s", file_line, in_line); + DBGLOG("line %lu: %s", file_line, in_line); for (i = 0; in_line[i] != '\0'; ++i) { // remove newlines if (in_line[i] == '\n' || in_line[i] == '\r') { in_line[i] = '\0';@@ -830,10 +851,8 @@ g_asm_line = in_line;
g_asm_line_number = file_line; result = parse_line(in_line, &instruction); - if (result) { - print_binary_word16(instruction); - putchar('\n'); - } + if (result) + write_binary_word16(fp_out, instruction); ++file_line; }
@@ -4,7 +4,11 @@ #include <stdio.h>
#include <stdlib.h> #include <string.h> -#define DBGLOG printf +#ifdef _DEBUG +#define DBGLOG printf +#else +#define DBGLOG(...) +#endif #define err(...) (fprintf(stderr, __VA_ARGS__), \ fprintf(stderr, "%lu | %s\n", file_line_no, file_line), false)@@ -13,7 +17,7 @@
#define is_symbol_char(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') \ || ('a' <= c && c <= 'z') || c == '_' || c == '.' \ || c == '$' || c == ':') -#define is_whitespace(c) ((c == ' ' || c == '\t' || c == '\n')) +#define is_whitespace(c) ((c == ' ' || c == '\t' || c == '\n' || c == '\r')) #define is_number(c) (('0' <= c && c <= '9')) #define MAX_LINE_LEN 256 // TODO: in/excludes NULL terminator?@@ -182,7 +186,7 @@ 0x32, 0x72, 0x02, 0x42, 0x13, 0x53, 0x07,
0x47, 0x00, 0x40, 0x15, 0x55}; for (i = 0; i < 5 && !is_whitespace(line[i]) && line[i] != ';' - && line[i] != '/'; ++i) + && line[i] != '/' && line[i] != '\0'; ++i) comp_str[i] = line[i]; comp_str[i] = '\0';@@ -207,7 +211,8 @@ size_t i;
char jump_str[5]; char *jump_tb[] = {"", "JGT", "JEQ", "JGE", "JLT", "JNE", "JLE", "JMP"}; - for (i = 0; i < 5 && !is_whitespace(line[i]) && line[i] != '/'; ++i) + for (i = 0; i < 5 && !is_whitespace(line[i]) && line[i] != '/' + && line[i] != '\0'; ++i) jump_str[i] = line[i]; jump_str[i] = '\0';@@ -221,7 +226,7 @@ return true;
} } - return err("syntax error: invalid jump field '%s'\n", jump_str); + return err("syntax error2: invalid jump field '%s'\n", jump_str); } /* instruction format: 0b111accccccdddjjj@@ -340,6 +345,7 @@ file_line_no = 0;
instruction_offset = 0; while (fgets(in_line, MAX_LINE_LEN, in_file) != NULL) { // parse loop ++file_line_no; + //DBGLOG("line %lu: %s", file_line_no, in_line); file_line = in_line; line = &in_line[skip_whitespace(in_line, MAX_LINE_LEN)];@@ -379,7 +385,8 @@ return ret;
} -char *usage_msg = "Usage: assembler1 [path/to/file.asm]\n"; +char *usage_msg = "Usage: n2tasm2 [input/file.asm] [assembled/output]\n" + " n2tasm2 [input/file.asm] # will output to STDOUT\n"; int main(int argc, char *argv[]) {@@ -390,7 +397,7 @@ out_file = stdout;
} else if (argc == 3) { out_file = fopen(argv[2], "wb"); if (out_file == NULL) - die("failed to open output file for writing\n"); + die("failed to open %s for writing\n", argv[2]); } else { die(usage_msg); }@@ -418,3 +425,4 @@ die("failed to close output file\n");
return 0; } +