projects/06/assembler1/assembler1.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "../bindump.h" #define DEBUG(...) printf(__VA_ARGS__) #define die(err_msg, exit_val) perror(err_msg); exit(exit_val) #define alert(...) fprintf(stderr, __VA_ARGS__) uint32_t myatoi(const char *a_field_str) { DEBUG("a_field_str = %s\n", a_field_str); // I know, I could just use atoi(), but where's the fun in that? // STUB return 0xaff; } bool parse_a_type(const char *line, uint16_t *instruction) { bool num_found = false; char c, a_field_str[6]; // TODO: eventually factor out use of array uint32_t a_field = 0; size_t i, a = 0; i = 0; while ((c = line[i]) != '\n') { if (c == ' ' || c == '\t' || c == '/') { if (num_found) { num_found = false; if (a < 7) { a_field_str[a] = '\0'; break; } else { die("fatal error: a-type value index " "%lu out of range\n", -1); } } else { ; // skip whitespace } } else if (num_found) { if ('0' <= c && c <= '9') { if (a > 4) { alert("error: @<number> too long\n"); return false; } a_field_str[a] = c; // get number } else { alert("syntax error: invalid char '%c' found " "after @\n", c); return false; } ++a; } else if (c == '@') { a = 0; num_found = true; } else { alert("syntax error: invalid char '%c' before @\n", c); return false; } ++i; } // TODO: extension: support negative numbers a_field = myatoi(a_field_str); if (a_field > 32767) { alert("error: %u > 32767, too large\n", a_field); return false; } *instruction = 0x0000 | (uint16_t) a_field; return true; // STUB, A-type MSB == 0 anyway } bool parse_c_type(const char *line, uint16_t *instruction) { DEBUG("parse_c_type()\n"); *instruction = 0x8000; // STUB, C-type MSB == 1 anyway return true; // STUB, C-type MSB == 1 anyway } // does not care about line line length; exits at first newline or after // relevant portion parsed (allows for syntactically-incorrect lines, I know) bool parse_instruction_w_type(const char *line, uint16_t *instruction) { bool ret; char c; size_t i = 0; while ((c = line[i]) != '\n') { if (c == ' ' || c == '\t') ; // skip whitespace else if (c == '@') { ret = parse_a_type(line, instruction); break; } else if (c >= '!' && c < '~') { ret = parse_c_type(line, instruction); break; } else { die("error: TODO explain...\n", -1); } ++i; } return ret; } bool parse_next_instruction(const char *line, uint16_t *instruction) { *instruction = 1337; // STUB, should be 0x539, 0b0000010100111001 return parse_instruction_w_type(line, instruction); return true; } /* return false for comment or invalid assembly instruction */ bool parse_line(const char *line, size_t line_len, uint16_t *instruction) { char c; bool slash_found = false; size_t i; if (line_len == 0 || line_len == 1) return false; // filter out comment lines //for (i = 0; (c = line[i]) != NULL; ++i) { for (i = 0; i < line_len; ++i) { c = line[i]; if (c == ' ' || c == '\t') { continue; } else if (c == '/') { if (slash_found) { // second slash means this is a comment return false; } slash_found = true; continue; } else if (slash_found) { // this char not slash, but previous was: invalid syntax // TODO: add line, column numbers alert("syntax error: found '/', comments need '//'\n"); return false; } else { // non-whitespace/slash char discovered break; } } // comment not found, so attempting to parse instruction return parse_next_instruction(line, instruction); } const char *test_lines[] = {"// this is a comment\n", // line 1 "@12345\n", // line 2 "\n", // line 3 "M+1\n", // line 4 "@98 // test comment\n", // line 5 "// this is another comment\n", // line 6 "/ this is a broken comment\n", // line 7 "D=M;JNE\n", // line 8 " @1337// immediate comment \n", // line 9 "\t@13371337 // number too long \n", // line 10 " @0x1337 // invalid char in num\n"}; // line 11 size_t num_test_lines = (sizeof(test_lines) / sizeof(test_lines[0])); int main() { // TODO: read file by lines, parse instructions bool result = false; uint16_t instruction; size_t test_line_len; // New organization allows code to eventually work with lines fetched // from a 'fgets()' loop or something // for testing for (size_t i = 0; i < num_test_lines; ++i) { DEBUG("%lu|%s", i+1, test_lines[i]); test_line_len = strlen(test_lines[i]); result = parse_line(test_lines[i], test_line_len, &instruction); if (result) { DEBUG("instruction: 0x%x | ", instruction); bindump_word16(instruction); // output instruction as binary putchar('\n'); } } return 0; } |