projects/06/assembler1/assembler1.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "../bindump.h" #define DEBUG(...) printf(__VA_ARGS__) #define die(err_msg, exit_val) perror(err_msg); exit(exit_val) #define alert(...) fprintf(stderr, __VA_ARGS__) uint32_t myatoi(const char *a_field_str) { size_t i; uint32_t ret = 0; for (i = 0; i < 5 && '0' <= a_field_str[i] && a_field_str[i] <= '9'; ++i) { ret = (ret * 10) + (a_field_str[i] - 0x30); } return ret; } bool parse_a_type(const char *line, uint16_t *instruction) { char c, a_field_str[6]; // TODO: eventually factor out use of array uint32_t a_field = 0; size_t i, a = 0; if (line[0] != '@') { alert("error: A-type instruction doesn't start with @\n"); return false; } if (line[1] == '\n') { alert("error: A-type instruction empty after @\n"); return false; } for (i = 1; (c = line[i]) != '\n' && a < 6; ++i) { if ('0' <= c && c <= '9') { if (a > 4) { alert("error: @<number> too long\n"); return false; } a_field_str[a] = c; // get number a++; } else if ((c == ' ' || c == '\t' || c == '/') && i > 1) { break; } else { // any other character alert("syntax error: invalid char '%c' found after @\n", c); return false; } } a_field_str[a] = '\0'; // exit // TODO: extension: support negative numbers a_field = myatoi(a_field_str); if (a_field > 32767) { alert("error: %u > 32767, too large\n", a_field); return false; } *instruction = 0x0000 | (uint16_t) a_field; return true; // STUB, A-type MSB == 0 anyway } bool parse_c_type(const char *line, uint16_t *instruction) { *instruction = 0x8888; // STUB, TODO implement return true; } // does not care about line line length; exits at first newline or after // relevant portion parsed (allows for syntactically-incorrect lines, I know) bool parse_next_instruction(const char *line, uint16_t *instruction) { bool ret; char c; size_t i = 0; while ((c = line[i]) != '\n') { if (c == ' ' || c == '\t') ; // skip any whitespace at start of line else if (c == '@') { ret = parse_a_type(&line[i], instruction); break; } else if (c >= '!' && c < '~') { ret = parse_c_type(&line[i], instruction); break; } else { alert("syntax error: line '%s' incorrectly formatted\n", line); } ++i; } return ret; } // return false for comment or invalid assembly instruction bool parse_line(const char *line, size_t line_len, uint16_t *instruction) { char c; bool slash_found = false; size_t i; if (line_len == 0 || line_len == 1) return false; // filter out comment lines //for (i = 0; (c = line[i]) != NULL; ++i) { for (i = 0; i < line_len; ++i) { c = line[i]; if (c == ' ' || c == '\t') { continue; } else if (c == '/') { if (slash_found) { // second slash means this is a comment return false; } slash_found = true; continue; } else if (slash_found) { // this char not slash, but previous was: invalid syntax // TODO: add line, column numbers alert("syntax error: found '/', comments need '//'\n"); return false; } else { // non-whitespace/slash char discovered break; } } // comment not found, so attempting to parse instruction return parse_next_instruction(line, instruction); } const char *test_lines[] = {"// this is a comment\n", // line 1 "@12345\n", // line 2 "\n", // line 3 "M+1\n", // line 4 "@98 // test comment\n", // line 5 "// this is another comment\n", // line 6 "/ this is a broken comment\n", // line 7 "D=M;JNE\n", // line 8 " @1337// immediate comment \n", // line 9 "\t@44441337 // number too long \n", // line 10 " \t @1 // 1\n", // line 11 "\t @2 // 2\n", // line 12 "\t @3\n", // line 13 "\t @4\n", // line 14 " \t@5 // 3\n", // line 15 " @6 // 4\n", // line 16 "\t @7\n", // line 17 " @0x1337 // invalid char in num\n"}; // line 18 size_t num_test_lines = (sizeof(test_lines) / sizeof(test_lines[0])); int main() { // TODO: read file by lines, parse instructions bool result = false; uint16_t instruction; size_t test_line_len; // New organization allows code to eventually work with lines fetched // from a 'fgets()' loop or something // for testing for (size_t i = 0; i < num_test_lines; ++i) { DEBUG("%lu|%s", i+1, test_lines[i]); test_line_len = strlen(test_lines[i]); result = parse_line(test_lines[i], test_line_len, &instruction); if (result) { DEBUG("instruction: 0x%x | ", instruction); bindump_word16(instruction); // output instruction as binary putchar('\n'); } } return 0; } |