projects/08/src/parser.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
#ifndef _PARSER_H #define _PARSER_H // 'parser.h' roughly corresponds to the 'Parser' module specified in // nand2tetris, with a few liberties taken. #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "util.h" #define _DEBUG #define CMD_STR_MAX_LEN (8) #define ARG2_MAX_LEN (5) // arg2 can only be a number between 0 and 65535 #define ARG1_MAX_LEN (MAX_LINE_LEN - CMD_STR_MAX_LEN - ARG2_MAX_LEN - 3) //#define ARG1_MAX_LEN 128 // temp value for debugging enum vm_command_t { C_ARITHMETIC, C_PUSH, C_POP, C_LABEL, C_GOTO, C_IF, C_FUNCTION, C_RETURN, C_CALL, C_UNUSED }; char *arith_cmd_lut[] = {"add", "sub", "neg", "eq", "gt", "lt", "and", "or", "not"}; char *cmd_lut[] = {"push", "pop", "label", "goto", "if-goto", "function", "return", "call"}; enum vm_command_t cmd_lut_vals[] = {C_PUSH, C_POP, C_LABEL, C_GOTO, C_IF, C_FUNCTION, C_RETURN, C_CALL}; struct vm_instruction_t { enum vm_command_t cmd; char *arg1; // if C_ARITHMETIC, cmd_str, else first arg uint16_t arg2; // 2nd arg (for push/pop/call/function) const char *line; char line_len; }; bool str_to_u16(uint16_t *res, char *s) { size_t i; *res = 0; for (i = 0; i < 5 && s[i] != '\0'; ++i) { if (!is_number(s[i])) { err("error: invalid char '%c' in \"%s\"\n", s[i], s); return false; } *res *= 10; *res += s[i] - '0'; // add number to result } return true; } bool cleanup_vm_instr(struct vm_instruction_t *vm_instr) { if (vm_instr->arg1 == NULL) return true; free(vm_instr->arg1); vm_instr->arg1 = NULL; return true; } void print_vm_instruction(struct vm_instruction_t *vm_instr) { printf("{\n\tcmd: %d,\n\targ1: \"%s\",\n\targ2: %hu,\n}\n", vm_instr->cmd, vm_instr->arg1, vm_instr->arg2); } // TODO: May not need parse_arg1(), parse_arg2(), could just have // 'parse_command()' and manually read vm_instr attributes when needed. This // makes sense because the entire command will need to be parsed anyway in // order to set all the vm_instr attributes; no sense in having redundant // functions. // Or not. Separate functions could be useful, as each could have its own // lookup table rather than 3 separate LUTs cluttering and bloating a single // function. It doesn't matter _really_, but I do want clean code if possible. // Will need to think on this... // // Expects the vm instruction line to _not_ start with whitespace (previously // trimmed) static bool parse_command_type(struct vm_instruction_t *vm_instr) { size_t i; char cmd_str[CMD_STR_MAX_LEN + 1]; // TODO check line_len against CMD_STR_MAX_LEN for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0' && (!is_whitespace(vm_instr->line[i])); ++i) { cmd_str[i] = vm_instr->line[i]; // TODO: check if too large, change loop } cmd_str[i] = '\0'; for (i = 0; i < (sizeof(arith_cmd_lut) / sizeof(arith_cmd_lut[0])); ++i) { if (!strncmp(cmd_str, arith_cmd_lut[i], CMD_STR_MAX_LEN)) { vm_instr->cmd = C_ARITHMETIC; vm_instr->arg1 = malloc(CMD_STR_MAX_LEN + 1); strncpy(vm_instr->arg1, arith_cmd_lut[i], CMD_STR_MAX_LEN); return true; } } for (i = 0; i < (sizeof(cmd_lut) / sizeof(cmd_lut[0])); ++i) { if (!strncmp(cmd_str, cmd_lut[i], CMD_STR_MAX_LEN)) { vm_instr->cmd = cmd_lut_vals[i]; return true; } } err("error: illegal instruction in line \"%s\"\n", vm_instr->line); return false; } static bool parse_arg1(struct vm_instruction_t *vm_instr) { size_t i, k; if (vm_instr->cmd == C_ARITHMETIC || vm_instr->cmd == C_RETURN) { return true; // 'return' or arg1 is already parsed and correct } else if (C_ARITHMETIC < vm_instr->cmd && vm_instr->cmd < C_UNUSED) { vm_instr->arg1 = malloc(ARG1_MAX_LEN + 1); // skip first token for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0' && (!is_whitespace(vm_instr->line[i])); ++i) ; if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n' || vm_instr->line[i] == '\r') { err("error: end of line encountered, no first arg\n"); free(vm_instr->arg1); return false; } // now at second token // WARNING possibly an overflow here! TODO fix when I'm not lazy for (++i, k = 0; is_symbol_char(vm_instr->line[i]) && i < MAX_LINE_LEN; ++i, ++k) { vm_instr->arg1[k] = vm_instr->line[i]; } vm_instr->arg1[k] = '\0'; return true; } err("error: illegal first argument in line \"%s\"\n", vm_instr->line); return false; } static bool parse_arg2(struct vm_instruction_t *vm_instr) { bool in_whitespace = false; uint8_t ws_count = 0; uint16_t arg2_u16; size_t i, k; char c, arg2_str[ARG2_MAX_LEN + 1]; if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP || vm_instr->cmd == C_CALL || vm_instr->cmd == C_FUNCTION) { // skip first two tokens for (i = 0; i < MAX_LINE_LEN && vm_instr->line[i] != '\0'; ++i) { c = vm_instr->line[i]; if (c == ' ' || c == '\t') { if (!in_whitespace) { ++ws_count; in_whitespace = true; } } else { in_whitespace = false; } if (ws_count >= 2) break; // break after two spaces/tabs found } if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n' || i >= MAX_LINE_LEN) { err("error: end of line encountered, no first arg\n"); free(vm_instr->arg1); return false; } // now at second token for (++i, k = 0; k < ARG2_MAX_LEN && is_number(vm_instr->line[i]); ++i, ++k) { arg2_str[k] = vm_instr->line[i]; } arg2_str[k] = '\0'; if (!str_to_u16(&arg2_u16, arg2_str)) { return false; } // TODO check if > 65535 (maybe?) vm_instr->arg2 = arg2_u16; return true; } err("error: can't parse 2nd arg from instruction type\n"); return false; } bool parse_line(struct vm_instruction_t *vm_instr) { vm_instr->cmd = C_UNUSED; vm_instr->arg1 = NULL; vm_instr->arg2 = 0; // 0 is still valid value if (!parse_command_type(vm_instr)) return false; if (!parse_arg1(vm_instr)) return false; if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP || vm_instr->cmd == C_FUNCTION || vm_instr->cmd == C_CALL) if (!parse_arg2(vm_instr)) return false; return true; } #endif // _PARSER_H |