Implement A-type instruction parse logic
x1phosura x1phosura@x1phosura.zone
Fri, 25 Nov 2022 22:44:54 -0800
1 files changed,
74 insertions(+),
25 deletions(-)
M
projects/06/assembler1/assembler1.c
→
projects/06/assembler1/assembler1.c
@@ -6,24 +6,72 @@ #include <string.h>
#include "../bindump.h" -#define DEBUG(x) printf(x) +#define DEBUG(...) printf(__VA_ARGS__) + +#define die(err_msg, exit_val) perror(err_msg); exit(exit_val) +#define alert(...) fprintf(stderr, __VA_ARGS__) -void die(const char *err_msg, int exit_val) -{ - perror(err_msg); - exit(exit_val); -} -void alert(const char *err_msg) +uint32_t myatoi(const char *a_field_str) { - fprintf(stderr, "%s", err_msg); + DEBUG("a_field_str = %s\n", a_field_str); + // I know, I could just use atoi(), but where's the fun in that? + // STUB + return 0xaff; } bool parse_a_type(const char *line, uint16_t *instruction) { - DEBUG("parse_a_type()\n"); - // eventually, if error, die("error: TODO explain...\n", -1); - *instruction = 0x0000; // STUB, A-type MSB == 0 anyway + bool num_found = false; + char c, a_field_str[6]; // TODO: eventually factor out use of array + uint32_t a_field = 0; + size_t i, a = 0; + + i = 0; + while ((c = line[i]) != '\n') { + if (c == ' ' || c == '\t' || c == '/') { + if (num_found) { + num_found = false; + if (a < 7) { + a_field_str[a] = '\0'; + break; + } else { + die("fatal error: a-type value index " + "%lu out of range\n", -1); + } + } else { + ; // skip whitespace + } + } else if (num_found) { + if ('0' <= c && c <= '9') { + if (a > 4) { + alert("error: @<number> too long\n"); + return false; + } + a_field_str[a] = c; // get number + } else { + alert("syntax error: invalid char '%c' found " + "after @\n", c); + return false; + } + ++a; + } else if (c == '@') { + a = 0; + num_found = true; + } else { + alert("syntax error: invalid char '%c' before @\n", c); + return false; + } + ++i; + } + // TODO: extension: support negative numbers + a_field = myatoi(a_field_str); + if (a_field > 32767) { + alert("error: %u > 32767, too large\n", a_field); + return false; + } + + *instruction = 0x0000 | (uint16_t) a_field; return true; // STUB, A-type MSB == 0 anyway }@@ -33,7 +81,6 @@ DEBUG("parse_c_type()\n");
*instruction = 0x8000; // STUB, C-type MSB == 1 anyway return true; // STUB, C-type MSB == 1 anyway } -// */ // does not care about line line length; exits at first newline or after // relevant portion parsed (allows for syntactically-incorrect lines, I know)@@ -61,7 +108,6 @@ }
return ret; } -// */ bool parse_next_instruction(const char *line, uint16_t *instruction) {@@ -93,13 +139,12 @@ if (slash_found) {
// second slash means this is a comment return false; } - slash_found= true; + slash_found = true; continue; } else if (slash_found) { // this char not slash, but previous was: invalid syntax // TODO: add line, column numbers - alert("\tsyntax error: found '/', comments " - "need '//'\n"); + alert("syntax error: found '/', comments need '//'\n"); return false; } else { // non-whitespace/slash char discovered@@ -111,14 +156,17 @@ // comment not found, so attempting to parse instruction
return parse_next_instruction(line, instruction); } -const char *test_lines[8] = {"// this is a comment\n", - "@12345\n", - "\n", - "M+1\n", - "@98\n", - "// this is another comment\n", - "/ this is a broken comment\n", - "D=M;JNE\n"}; +const char *test_lines[] = {"// this is a comment\n", // line 1 + "@12345\n", // line 2 + "\n", // line 3 + "M+1\n", // line 4 + "@98 // test comment\n", // line 5 + "// this is another comment\n", // line 6 + "/ this is a broken comment\n", // line 7 + "D=M;JNE\n", // line 8 + " @1337// immediate comment \n", // line 9 + "\t@13371337 // number too long \n", // line 10 + " @0x1337 // invalid char in num\n"}; // line 11 size_t num_test_lines = (sizeof(test_lines) / sizeof(test_lines[0])); int main()@@ -133,10 +181,11 @@ // from a 'fgets()' loop or something
// for testing for (size_t i = 0; i < num_test_lines; ++i) { + DEBUG("%lu|%s", i+1, test_lines[i]); test_line_len = strlen(test_lines[i]); result = parse_line(test_lines[i], test_line_len, &instruction); if (result) { - printf("instruction: 0x%x | ", instruction); + DEBUG("instruction: 0x%x | ", instruction); bindump_word16(instruction); // output instruction as binary putchar('\n'); }