Copy over project 7 VM files for project 8
x1phosura x1phosura@x1phosura.zone
Tue, 07 Nov 2023 19:16:04 -0800
6 files changed,
769 insertions(+),
0 deletions(-)
A
projects/08/Makefile
@@ -0,0 +1,15 @@
+# nand2tetris chapter 6 VM translator makefile +# for debugging this makefile: `make -d` +CFLAGS = -std=c99 -Wall -Wextra + +all: bin/vmtranslator + +# TODO: clean up, make more Makefile-idiomatic + +bin/vmtranslator: src/vmtranslator.c + $(CC) $(CFLAGS) -o $@ $< + +clean: + rm -vf bin/* + +.PHONY: clean
A
projects/08/src/codewriter.h
@@ -0,0 +1,351 @@
+#ifndef _CODEWRITER_H +#define _CODEWRITER_H + +// 'codewriter.h' roughly corresponds to the 'CodeWriter' module specified in +// nand2tetris, with a few liberties taken. + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "parser.h" +#include "util.h" + +#define _DEBUG + +// memory mapping: +// 0-15 virtual registers R0-R15 +// 16-255 static variables +// 256-2047 stack +#define SP (0) // points to word ahead of top of stack +#define LCL (1) // points to local segment +#define ARG (2) // points to argument segment +#define POINTER (3) +#define THIS (3) +#define THAT (4) +#define TEMP (5) +#define R13 (13) // R13-R15 are scratch space that +#define R14 (14) // VM-generated assembly can use +#define R15 (15) // for whatever. +#define STATIC (16) // start of static variables segment (240 words, 16-255) + +char *static_sym_name; + +// static segment index indexes into map, retrieves hack assembly symbol offset +#define MAX_STATIC_SYMBOLS (240) // for whatever. +uint16_t static_symbol_map[MAX_STATIC_SYMBOLS]; +uint8_t g_symbol_offset_bump = 0; // holds current largest symbol offset, bumps + + +char comp_vm_funcs[] = "@__comp_funcs_end\n" + "0;JMP\n" + "\n" + "(__test_eq)\n" + "@SP\n" + "AM=M-1\n" // RAM[SP]--, A = RAM[SP] + "D=M\n" // D = RAM[SP] (top stack val) + "A=A-1\n" // --A (--> bottom stack val) + "D=M-D\n" // if D == 0, equal + "M=0\n" // prematurely push false + "@__test_eq_neq\n" // if D == 0, equal + "D;JNE\n" // if D != 0, not equal, jump + "@SP\n" // get SP + "A=M-1\n" // get bottom stack val address + "M=-1\n" // push 0xffff (true) + "(__test_eq_neq)\n" + "@R13\n" // return address in RAM[R13] + "A=M\n" // A = return address + "0;JMP\n" // return + "\n" + + "(__test_gt)\n" + "@SP\n" + "AM=M-1\n" // RAM[SP]--, A = RAM[SP] + "D=M\n" // D = RAM[SP] (top stack val) + "A=A-1\n" // --A (--> bottom stack val) + "D=M-D\n" // if (D - M) > 0; push true + "M=0\n" // prematurely push false + "@__test_gt_neq\n" // if D == 0, equal + "D;JLE\n" // if D != 0, not equal, jump + "@SP\n" // get SP + "A=M-1\n" // get bottom stack val address + "M=-1\n" // push 0xffff (true) + "(__test_gt_neq)\n" + "@R13\n" // return address in RAM[R13] + "A=M\n" // A = return address + "0;JMP\n" // return + "\n" + + "(__test_lt)\n" + "@SP\n" + "AM=M-1\n" // RAM[SP]--, A = RAM[SP] + "D=M\n" // D = RAM[SP] (top stack val) + "A=A-1\n" // --A (--> bottom stack val) + "D=M-D\n" // if (D - M) < 0; push true + "M=0\n" // prematurely push false + "@__test_lt_neq\n" // if D == 0, equal + "D;JGE\n" // if D != 0, not equal, jump + "@SP\n" // get SP + "A=M-1\n" // get bottom stack val address + "M=-1\n" // push 0xffff (true) + "(__test_lt_neq)\n" + "@R13\n" // return address in RAM[R13] + "A=M\n" // A = return address + "0;JMP\n" // return + "\n" + "(__comp_funcs_end)\n"; + +char vm_init[] = "//@256\n" // starting address of stack (nothing pushed yet) + "//D=A\n" // D = 256 + "//@SP\n" // A = <constant representing address of SP> + "//M=D\n" // <memory pointed to by SP> = 256 + "\n%s\n"; // <- comp_vm_funcs +// TODO: add initializers for argument, local, static, constant, this, that +char vm_stop[] = "(END)\n" // starting address of stack (nothing pushed yet) + "@END\n" // D = 256 + "0;JMP\n"; // A = <constant representing address of SP> + + +void write_vm_init(FILE *fp) +{ + fprintf(fp, vm_init, comp_vm_funcs); +} + +void write_vm_stop(FILE *fp) +{ + fprintf(fp, "\n%s", vm_stop); +} + +static bool write_arithmetic(struct vm_instruction_t *vm_instr, FILE *fp) +{ + char binary_op_template[] = "@SP\n" + "AM=M-1\n" // RAM[SP]--, A = RAM[SP] + "D=M\n" // D = RAM[SP] (top stack val) + "A=A-1\n" // --A (--> bottom stack val) + "%s"; // arithmetic op goes here + char unary_op_template[] = "@SP\n" + "A=M-1\n" // A = SP - 1 + "%s"; // arithmetic op goes here + + char op_add[] = "M=D+M\n"; + char op_sub[] = "M=M-D\n"; + + char op_eq[] = "@%s_%lu_eq\n" // <- static_sym_name, file_line_number + "D=A\n" + "@R13\n" + "M=D\n" + "@__test_eq\n" + "0;JMP\n" + "(%s_%lu_eq)\n" // return here + "\n"; + char op_gt[] = "@%s_%lu_gt\n" // <- static_sym_name, file_line_number + "D=A\n" + "@R13\n" + "M=D\n" + "@__test_gt\n" + "0;JMP\n" + "(%s_%lu_gt)\n" // return here + "\n"; + char op_lt[] = "@%s_%lu_lt\n" // <- static_sym_name, file_line_number + "D=A\n" + "@R13\n" + "M=D\n" + "@__test_lt\n" + "0;JMP\n" + "(%s_%lu_lt)\n" // return here + "\n"; + + char op_and[] = "M=D&M\n"; + char op_or[] = "M=D|M\n"; + + char op_neg[] = "M=-M\n"; + char op_not[] = "M=!M\n"; + + // binary operations + if (!strncmp(vm_instr->arg1, "add", CMD_STR_MAX_LEN)) { + fprintf(fp, binary_op_template, op_add); + } else if (!strncmp(vm_instr->arg1, "sub", CMD_STR_MAX_LEN)) { + fprintf(fp, binary_op_template, op_sub); + } else if (!strncmp(vm_instr->arg1, "eq", CMD_STR_MAX_LEN)) { + fprintf(fp, op_eq, static_sym_name, file_line_no, + static_sym_name, file_line_no); + } else if (!strncmp(vm_instr->arg1, "gt", CMD_STR_MAX_LEN)) { + fprintf(fp, op_gt, static_sym_name, file_line_no, + static_sym_name, file_line_no); + } else if (!strncmp(vm_instr->arg1, "lt", CMD_STR_MAX_LEN)) { + fprintf(fp, op_lt, static_sym_name, file_line_no, + static_sym_name, file_line_no); + } else if (!strncmp(vm_instr->arg1, "and", CMD_STR_MAX_LEN)) { + fprintf(fp, binary_op_template, op_and); + } else if (!strncmp(vm_instr->arg1, "or", CMD_STR_MAX_LEN)) { + fprintf(fp, binary_op_template, op_or); + // unary operations + } else if (!strncmp(vm_instr->arg1, "neg", CMD_STR_MAX_LEN)) { + fprintf(fp, unary_op_template, op_neg); + } else if (!strncmp(vm_instr->arg1, "not", CMD_STR_MAX_LEN)) { + fprintf(fp, unary_op_template, op_not); + } else { + err("error: invalid arithmetic op \"%s\"\n", vm_instr->arg1); + return false; + } + + return true; +} + +static bool resolve_static_address(struct vm_instruction_t *vm_instr, + uint16_t *addr) +{ + uint16_t symbol_offset; + if (vm_instr->arg2 >= MAX_STATIC_SYMBOLS) { + err("error: arg2 too large, >= %u\n", MAX_STATIC_SYMBOLS); + return false; + } + + symbol_offset = static_symbol_map[vm_instr->arg2]; + if (symbol_offset == 0xffff) { // new offset not in map (-1 special) + if (g_symbol_offset_bump >= MAX_STATIC_SYMBOLS) { + err("error: symbol offset grew too large (>= %u), " + "too many static variables\n", MAX_STATIC_SYMBOLS); + return false; + } + static_symbol_map[vm_instr->arg2] = g_symbol_offset_bump; + *addr = g_symbol_offset_bump; + ++g_symbol_offset_bump; // bump global symbol offset + } else { + // offset found in map, return symbol value/index + *addr = symbol_offset; + } + + return true; +} + +// push 16-bit value from segment offset onto top of stack +static bool write_push(struct vm_instruction_t *vm_instr, FILE *fp) +{ + uint16_t addr, arg2 = vm_instr->arg2; + + // TODO: could add SP counter/check to catch overflows + char const_template[] = "@%hu\n" // A = constant + "D=A\n%s"; // D = constant + char addr_template[] = "@%hu\n" // A = segment + index + "D=M\n%s"; // D = RAM[segment + index] + char static_template[] = "@%s.%hu\n" // A = segment + index + "D=M\n%s"; // D = RAM[segment + index] + char indirect_template[] = "@%hu\n" // A = segment + "D=M\n" // D = RAM[segment] + "@%hu\n" // A = index + "A=A+D\n" // A = segment + index + "D=M\n%s"; // D = RAM[segment + index] + char push_boilerplate[] = "@SP\n" + "M=M+1\n" // RAM[SP]++ // inc SP + "A=M-1\n" // A = RAM[SP] - 1 // prev top + "M=D\n"; // RAM[SP] = constant + + if (!strcmp(vm_instr->arg1, "constant")) { + // TODO: check size of constant (allowed to be > 32,767?) + // TODO: look in nand2tetris forums in case issue already noted + fprintf(fp, const_template, arg2, push_boilerplate); + } else if (!strcmp(vm_instr->arg1, "argument")) { + fprintf(fp, indirect_template, ARG, arg2, push_boilerplate); + } else if (!strcmp(vm_instr->arg1, "local")) { + fprintf(fp, indirect_template, LCL, arg2, push_boilerplate); + } else if (!strcmp(vm_instr->arg1, "static")) { + if (!resolve_static_address(vm_instr, &addr)) { + return false; + } + fprintf(fp, static_template, static_sym_name, addr, + push_boilerplate); + } else if (!strcmp(vm_instr->arg1, "this")) { + fprintf(fp, indirect_template, THIS, arg2, push_boilerplate); + } else if (!strcmp(vm_instr->arg1, "that")) { + fprintf(fp, indirect_template, THAT, arg2, push_boilerplate); + } else if (!strcmp(vm_instr->arg1, "pointer")) { + addr = POINTER + vm_instr->arg2; + fprintf(fp, addr_template, addr, push_boilerplate); + } else if (!strcmp(vm_instr->arg1, "temp")) { + addr = TEMP + vm_instr->arg2; + fprintf(fp, addr_template, addr, push_boilerplate); + } else { + err("error: invalid segment name \"%s\"\n", + vm_instr->arg1); + return false; + } + + return true; +} + +// pop 16-bit value from top of stack into segment offset +static bool write_pop(struct vm_instruction_t *vm_instr, FILE *fp) +{ + // TODO: could add SP counter/check to catch overflows + uint16_t addr, arg2 = vm_instr->arg2; + char pop_indirect_template[] = "@%hu\n" // @segment + "D=M\n" // D = segment + "@%hu\n" // @index + "D=A+D\n" // A = segment + index + "@R13\n" + "M=D\n" // RAM[13] = segment + index + "@SP\n" // + "AM=M-1\n" // + "D=M\n" // + "@R13\n" // + "A=M\n" // + "M=D\n"; // + char pop_addr_template[] = "@SP\n" + "AM=M-1\n" // decrement SP + "D=M\n" // "pop" (read) value into D + "@%hu\n" // load address + "M=D\n"; // "pop" (write) value to RAM + char pop_static_template[] = "@SP\n" + "AM=M-1\n" // decrement SP + "D=M\n" // "pop" (read) value into D + "@%s.%hu\n" // A = segment + index + "M=D\n"; // RAM[segment + index] = D + + if (!strcmp(vm_instr->arg1, "argument")) { + fprintf(fp, pop_indirect_template, ARG, arg2); + } else if (!strcmp(vm_instr->arg1, "local")) { + fprintf(fp, pop_indirect_template, LCL, arg2); + } else if (!strcmp(vm_instr->arg1, "static")) { + if (!resolve_static_address(vm_instr, &addr)) { + return false; + } + fprintf(fp, pop_static_template, static_sym_name, addr); + } else if (!strcmp(vm_instr->arg1, "this")) { + fprintf(fp, pop_indirect_template, THIS, arg2); + } else if (!strcmp(vm_instr->arg1, "that")) { + fprintf(fp, pop_indirect_template, THAT, arg2); + } else if (!strcmp(vm_instr->arg1, "pointer")) { + addr = POINTER + vm_instr->arg2; + fprintf(fp, pop_addr_template, addr); + } else if (!strcmp(vm_instr->arg1, "temp")) { + addr = TEMP + vm_instr->arg2; + fprintf(fp, pop_addr_template, addr); + } else { + err("error: invalid segment name \"%s\"\n", + vm_instr->arg1); + return false; + } + + return true; +} + +bool write_instruction(struct vm_instruction_t *vm_instr, FILE *fp) +{ + fprintf(fp, "\n// %lu: %s\n", file_line_no, vm_instr->line); + if (vm_instr->cmd == C_ARITHMETIC) { + write_arithmetic(vm_instr, fp); + } else if (vm_instr->cmd == C_PUSH) { + write_push(vm_instr, fp); + } else if (vm_instr->cmd == C_POP) { + write_pop(vm_instr, fp); + } else { + err("error: unrecognized instruction (%u)\n", vm_instr->cmd); + return false; + } + return true; +} + +#endif // _CODEWRITER_H
A
projects/08/src/parser.h
@@ -0,0 +1,229 @@
+#ifndef _PARSER_H +#define _PARSER_H + +// 'parser.h' roughly corresponds to the 'Parser' module specified in +// nand2tetris, with a few liberties taken. + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "util.h" + +#define _DEBUG + +#define CMD_STR_MAX_LEN (8) +#define ARG2_MAX_LEN (5) // arg2 can only be a number between 0 and 65535 +#define ARG1_MAX_LEN (MAX_LINE_LEN - CMD_STR_MAX_LEN - ARG2_MAX_LEN - 3) +//#define ARG1_MAX_LEN 128 // temp value for debugging + +enum vm_command_t { + C_ARITHMETIC, + C_PUSH, + C_POP, + C_LABEL, + C_GOTO, + C_IF, + C_FUNCTION, + C_RETURN, + C_CALL, + C_UNUSED +}; + +char *arith_cmd_lut[] = {"add", "sub", "neg", "eq", "gt", "lt", "and", + "or", "not"}; +char *cmd_lut[] = {"push", "pop", "label", "goto", "if-goto", "function", + "return", "call"}; +enum vm_command_t cmd_lut_vals[] = {C_PUSH, C_POP, C_LABEL, C_GOTO, C_IF, + C_FUNCTION, C_RETURN, C_CALL}; + +struct vm_instruction_t { + enum vm_command_t cmd; + char *arg1; // if C_ARITHMETIC, cmd_str, else first arg + uint16_t arg2; // 2nd arg (for push/pop/call/function) + const char *line; + char line_len; +}; + + +bool str_to_u16(uint16_t *res, char *s) +{ + size_t i; + *res = 0; + for (i = 0; i < 5 && s[i] != '\0'; ++i) { + if (!is_number(s[i])) { + err("error: invalid char '%c' in \"%s\"\n", s[i], s); + return false; + } + *res *= 10; + *res += s[i] - '0'; // add number to result + } + return true; +} + +bool cleanup_vm_instr(struct vm_instruction_t *vm_instr) +{ + if (vm_instr->arg1 == NULL) + return true; + + free(vm_instr->arg1); + vm_instr->arg1 = NULL; + return true; +} + +void print_vm_instruction(struct vm_instruction_t *vm_instr) +{ + printf("{\n\tcmd: %d,\n\targ1: \"%s\",\n\targ2: %hu,\n}\n", + vm_instr->cmd, vm_instr->arg1, vm_instr->arg2); +} + +// TODO: May not need parse_arg1(), parse_arg2(), could just have +// 'parse_command()' and manually read vm_instr attributes when needed. This +// makes sense because the entire command will need to be parsed anyway in +// order to set all the vm_instr attributes; no sense in having redundant +// functions. +// Or not. Separate functions could be useful, as each could have its own +// lookup table rather than 3 separate LUTs cluttering and bloating a single +// function. It doesn't matter _really_, but I do want clean code if possible. +// Will need to think on this... +// +// Expects the vm instruction line to _not_ start with whitespace (previously +// trimmed) +static bool parse_command_type(struct vm_instruction_t *vm_instr) +{ + size_t i; + char cmd_str[CMD_STR_MAX_LEN + 1]; + + // TODO check line_len against CMD_STR_MAX_LEN + for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0' + && (!is_whitespace(vm_instr->line[i])); ++i) { + cmd_str[i] = vm_instr->line[i]; + // TODO: check if too large, change loop + } + cmd_str[i] = '\0'; + + for (i = 0; i < (sizeof(arith_cmd_lut) / sizeof(arith_cmd_lut[0])); ++i) { + if (!strncmp(cmd_str, arith_cmd_lut[i], CMD_STR_MAX_LEN)) { + vm_instr->cmd = C_ARITHMETIC; + vm_instr->arg1 = malloc(CMD_STR_MAX_LEN + 1); + strncpy(vm_instr->arg1, arith_cmd_lut[i], CMD_STR_MAX_LEN); + return true; + } + } + for (i = 0; i < (sizeof(cmd_lut) / sizeof(cmd_lut[0])); ++i) { + if (!strncmp(cmd_str, cmd_lut[i], CMD_STR_MAX_LEN)) { + vm_instr->cmd = cmd_lut_vals[i]; + return true; + } + } + + err("error: illegal instruction in line \"%s\"\n", vm_instr->line); + return false; +} + +static bool parse_arg1(struct vm_instruction_t *vm_instr) +{ + size_t i, k; + + if (vm_instr->cmd == C_ARITHMETIC || vm_instr->cmd == C_RETURN) { + return true; // 'return' or arg1 is already parsed and correct + } else if (C_ARITHMETIC < vm_instr->cmd && vm_instr->cmd < C_UNUSED) { + vm_instr->arg1 = malloc(ARG1_MAX_LEN + 1); + // skip first token + for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0' + && (!is_whitespace(vm_instr->line[i])); ++i) + ; + if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n' + || vm_instr->line[i] == '\r') { + err("error: end of line encountered, no first arg\n"); + free(vm_instr->arg1); + return false; + } + // now at second token + // WARNING possibly an overflow here! TODO fix when I'm not lazy + for (++i, k = 0; is_symbol_char(vm_instr->line[i]) + && i < MAX_LINE_LEN; ++i, ++k) { + vm_instr->arg1[k] = vm_instr->line[i]; + } + vm_instr->arg1[k] = '\0'; + return true; + } + + err("error: illegal first argument in line \"%s\"\n", vm_instr->line); + return false; +} + +static bool parse_arg2(struct vm_instruction_t *vm_instr) +{ + bool in_whitespace = false; + uint8_t ws_count = 0; + uint16_t arg2_u16; + size_t i, k; + char c, arg2_str[ARG2_MAX_LEN + 1]; + + if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP + || vm_instr->cmd == C_CALL || vm_instr->cmd == C_FUNCTION) { + + + // skip first two tokens + for (i = 0; i < MAX_LINE_LEN && vm_instr->line[i] != '\0'; ++i) { + c = vm_instr->line[i]; + if (c == ' ' || c == '\t') { + if (!in_whitespace) { + ++ws_count; + in_whitespace = true; + } + } else { + in_whitespace = false; + } + + if (ws_count >= 2) + break; // break after two spaces/tabs found + } + if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n' + || i >= MAX_LINE_LEN) { + err("error: end of line encountered, no first arg\n"); + free(vm_instr->arg1); + return false; + } + + // now at second token + for (++i, k = 0; k < ARG2_MAX_LEN + && is_number(vm_instr->line[i]); ++i, ++k) { + arg2_str[k] = vm_instr->line[i]; + } + arg2_str[k] = '\0'; + + if (!str_to_u16(&arg2_u16, arg2_str)) { + return false; + } + // TODO check if > 65535 (maybe?) + vm_instr->arg2 = arg2_u16; + return true; + } + + err("error: can't parse 2nd arg from instruction type\n"); + return false; +} + +bool parse_line(struct vm_instruction_t *vm_instr) +{ + vm_instr->cmd = C_UNUSED; + vm_instr->arg1 = NULL; + vm_instr->arg2 = 0; // 0 is still valid value + + if (!parse_command_type(vm_instr)) + return false; + if (!parse_arg1(vm_instr)) + return false; + if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP + || vm_instr->cmd == C_FUNCTION || vm_instr->cmd == C_CALL) + if (!parse_arg2(vm_instr)) + return false; + + return true; +} + +#endif // _PARSER_H
A
projects/08/src/util.h
@@ -0,0 +1,43 @@
+#ifndef _UTIL_H +#define _UTIL_H + +// Contains shared constants/macros and miscellaneous useful functions. + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define _DEBUG // TODO: find way to remove this/restructure so ifdef only in + // this file + +#ifdef _DEBUG +#define DBGLOG printf +#else +#define DBGLOG(...) +#endif + +extern char *file_line; // reference to currently-read line (for convenience) +extern size_t file_line_no; // line number, regardless of line content + +#define err(...) (fprintf(stdout, __VA_ARGS__), \ + fprintf(stdout, "%lu | %s\n", file_line_no, file_line)) +#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) +#define is_symbol_char(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') \ + || ('a' <= c && c <= 'z') || c == '_' || c == '.' \ + || c == '$' || c == ':') +#define is_whitespace(c) ((c == ' ' || c == '\t' || c == '\n' || c == '\r')) +#define is_number(c) (('0' <= c && c <= '9')) + +#define MAX_LINE_LEN (256) + + +size_t skip_whitespace(const char *line, size_t n) +{ + size_t i; + for (i = 0; is_whitespace(line[i]) && i < n; ++i); + return i; +} + +#endif // _UTIL_H
A
projects/08/src/vmtranslator.c
@@ -0,0 +1,128 @@
+#include <libgen.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "codewriter.h" +#include "parser.h" +#include "util.h" + +#define _DEBUG + +char *file_line; +size_t file_line_no; + + +bool set_static_symbol_name(char *filename) { + size_t i, filename_len, base_name_len; + char *tmp, *base_name; + + filename_len = strlen(filename); + tmp = malloc(filename_len + 1); + for (i = 0; i < filename_len; ++i) + tmp[i] = filename[i]; // copy filename (basename may modify arg) + + base_name = basename(tmp); + base_name_len = strlen(base_name); + static_sym_name = malloc(base_name_len + 1); + + for (i = 0; i < base_name_len && base_name[i] != '.'; ++i) { + if (!is_symbol_char(base_name[i])) { + err("found bad character '0x%hhx' in filename \"%s\"\n", + base_name[i], filename); + free(tmp); + return false; + } + static_sym_name[i] = base_name[i]; + } + static_sym_name[i] = '\0'; + free(tmp); + return true; +} + +// translate: iterate over lines in in_file, translate VM instructions to +// assembly, write to out_file +bool translate(FILE *in_file, FILE *out_file) +{ + char *line, in_line[MAX_LINE_LEN]; + struct vm_instruction_t vm_instr; + size_t i, line_len; + + write_vm_init(out_file); + for (i = 0; i < MAX_STATIC_SYMBOLS; ++i) + static_symbol_map[i] = -1; // initialize symbol map table + + file_line_no = 0; + //instruction_offset = 0; // TODO: unnecessary? + while (fgets(in_line, MAX_LINE_LEN, in_file) != NULL) { // parse loop + ++file_line_no; + file_line = in_line; + line = &in_line[skip_whitespace(in_line, MAX_LINE_LEN)]; + + line_len = 0; + for (i = 0; line[i] != '\0'; ++i) { + if (line[i] == '\n' || line[i] == '\r') { + line[i] = '\0'; // remove newlines + break; + } + ++line_len; // get line length + } + + if (line_len == 0) // "empty" line + continue; + if (line_len > 1) { + if (line[0] == '/' && line[1] == '/') { // if comment + continue; + } else { + vm_instr.line = line; + vm_instr.line_len = line_len; + if (!parse_line(&vm_instr)) + return false; + if (!write_instruction(&vm_instr, out_file)) + return false; + cleanup_vm_instr(&vm_instr); + } + } + } + + write_vm_stop(out_file); + return true; +} + + +char *usage_msg = "Usage: vmtranslator [input/file.vm] [translated/output]\n" + " vmtranslator [input/file.vm] # output to STDOUT\n"; + +int main(int argc, char *argv[]) +{ + FILE *in_file, *out_file; + if (argc == 2) { + out_file = stdout; + } else if (argc == 3) { + out_file = fopen(argv[2], "wb"); + if (out_file == NULL) + die("failed to open %s for writing\n", argv[2]); + } else { + die(usage_msg); + } + + in_file = fopen(argv[1], "r"); // read input file + if (in_file == NULL) + die("failed to open %s for reading\n", argv[1]); + + if (!set_static_symbol_name(argv[1])) + die("error reading file name %s\n", argv[1]); + if (!translate(in_file, out_file)) // first pass + die("failed to translate VM code in file\n"); + + if (fclose(in_file)) + die("failed to close VM file\n"); + if (fclose(out_file)) // TODO check if stdout + die("failed to close assembly output file\n"); + free(static_sym_name); + + return 0; +} +