all repos — nand2tetris @ 67290b1e3bef8ebc7c9e28b03c7adc54a54c74a2

my nand2tetris progress

projects/08/src/parser.h

 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
#ifndef _PARSER_H
#define _PARSER_H

// 'parser.h' roughly corresponds to the 'Parser' module specified in
// nand2tetris, with a few liberties taken.

#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "util.h"

#define _DEBUG

#define CMD_STR_MAX_LEN (8)
#define ARG2_MAX_LEN    (5)  // arg2 can only be a number between 0 and 65535
#define ARG1_MAX_LEN    (MAX_LINE_LEN - CMD_STR_MAX_LEN - ARG2_MAX_LEN - 3)
//#define ARG1_MAX_LEN    128  // temp value for debugging

enum vm_command_t {
	C_ARITHMETIC,
	C_PUSH,
	C_POP,
	C_LABEL,
	C_GOTO,
	C_IF,
	C_FUNCTION,
	C_RETURN,
	C_CALL,
	C_UNUSED
};

char *arith_cmd_lut[] = {"add", "sub", "neg", "eq", "gt", "lt", "and",
                         "or", "not"};
char *cmd_lut[] = {"push", "pop", "label", "goto", "if-goto", "function",
                   "return", "call"};
enum vm_command_t cmd_lut_vals[] = {C_PUSH, C_POP, C_LABEL, C_GOTO, C_IF,
                                    C_FUNCTION, C_RETURN, C_CALL};

struct vm_instruction_t {
	enum vm_command_t cmd;
	char *arg1;             // if C_ARITHMETIC, cmd_str, else first arg
	uint16_t arg2;          // 2nd arg (for push/pop/call/function)
	const char *line;
	char line_len;
};


bool str_to_u16(uint16_t *res, char *s)
{
	size_t i;
	*res = 0;
	for (i = 0; i < 5 && s[i] != '\0'; ++i) {
		if (!is_number(s[i])) {
			err("error: invalid char '%c' in \"%s\"\n", s[i], s);
			return false;
		}
		*res *= 10;
		*res += s[i] - '0';  // add number to result
	}
	return true;
}

bool cleanup_vm_instr(struct vm_instruction_t *vm_instr)
{
	if (vm_instr->arg1 == NULL)
		return true;

	free(vm_instr->arg1);
	vm_instr->arg1 = NULL;
	return true;
}

void print_vm_instruction(struct vm_instruction_t *vm_instr, FILE *fp)
{
	fprintf(fp, "{\n\tcmd: %d,\n\targ1: \"%s\",\n\targ2: %hu,\n}\n",
	        vm_instr->cmd, vm_instr->arg1, vm_instr->arg2);
}

// TODO: May not need parse_arg1(), parse_arg2(), could just have
// 'parse_command()' and manually read vm_instr attributes when needed. This
// makes sense because the entire command will need to be parsed anyway in
// order to set all the vm_instr attributes; no sense in having redundant
// functions.
// Or not. Separate functions could be useful, as each could have its own
// lookup table rather than 3 separate LUTs cluttering and bloating a single
// function. It doesn't matter _really_, but I do want clean code if possible.
// Will need to think on this...
//
// Expects the vm instruction line to _not_ start with whitespace (previously
// trimmed)
static bool parse_command_type(struct vm_instruction_t *vm_instr)
{
	size_t i;
	char cmd_str[CMD_STR_MAX_LEN + 1];

	// TODO check line_len against CMD_STR_MAX_LEN
	for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0'
	            && (!is_whitespace(vm_instr->line[i])); ++i) {
		cmd_str[i] = vm_instr->line[i];
		// TODO: check if too large, change loop
	}
	cmd_str[i] = '\0';

	for (i = 0; i < (sizeof(arith_cmd_lut) / sizeof(arith_cmd_lut[0])); ++i) {
		if (!strncmp(cmd_str, arith_cmd_lut[i], CMD_STR_MAX_LEN)) {
			vm_instr->cmd = C_ARITHMETIC;
			vm_instr->arg1 = malloc(CMD_STR_MAX_LEN + 1);
			strncpy(vm_instr->arg1, arith_cmd_lut[i], CMD_STR_MAX_LEN);
			return true;
		}
	}
	for (i = 0; i < (sizeof(cmd_lut) / sizeof(cmd_lut[0])); ++i) {
		if (!strncmp(cmd_str, cmd_lut[i], CMD_STR_MAX_LEN)) {
			vm_instr->cmd = cmd_lut_vals[i];
			return true;
		}
	}

	err("error: illegal instruction in line \"%s\"\n", vm_instr->line);
	return false;
}

static bool parse_arg1(struct vm_instruction_t *vm_instr)
{
	size_t i, k;

	if (vm_instr->cmd == C_ARITHMETIC || vm_instr->cmd == C_RETURN) {
		return true;  // 'return' or arg1 is already parsed and correct
	} else if (C_ARITHMETIC < vm_instr->cmd && vm_instr->cmd < C_UNUSED) {
		vm_instr->arg1 = malloc(ARG1_MAX_LEN + 1);
		// skip first token
		for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0'
				    && (!is_whitespace(vm_instr->line[i])); ++i)
			;
		if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n'
			                      || vm_instr->line[i] == '\r') {
			err("error: end of line encountered, no first arg\n");
			free(vm_instr->arg1);
			return false;
		}
		// now at second token
		// WARNING possibly an overflow here! TODO fix when I'm not lazy
		for (++i, k = 0; is_symbol_char(vm_instr->line[i])
		                 && i < MAX_LINE_LEN; ++i, ++k) {
			vm_instr->arg1[k] = vm_instr->line[i];
		}
		vm_instr->arg1[k] = '\0';
		return true;
	}

	err("error: illegal first argument in line \"%s\"\n", vm_instr->line);
	return false;
}

static bool parse_arg2(struct vm_instruction_t *vm_instr)
{
	bool in_whitespace = false;
	uint8_t ws_count = 0;
	uint16_t arg2_u16;
	size_t i, k;
	char c, arg2_str[ARG2_MAX_LEN + 1];

	if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP
	    || vm_instr->cmd == C_CALL || vm_instr->cmd == C_FUNCTION) {


		// skip first two tokens
		for (i = 0; i < MAX_LINE_LEN && vm_instr->line[i] != '\0'; ++i) {
			c = vm_instr->line[i];
			if (c == ' ' || c == '\t') {
				if (!in_whitespace) {
					++ws_count;
					in_whitespace = true;
				}
			} else {
				in_whitespace = false;
			}

			if (ws_count >= 2)
				break;  // break after two spaces/tabs found
		}
		if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n'
		    || i >= MAX_LINE_LEN) {
			err("error: end of line encountered, no first arg\n");
			free(vm_instr->arg1);
			return false;
		}

		// now at second token
		for (++i, k = 0; k < ARG2_MAX_LEN
		                 && is_number(vm_instr->line[i]); ++i, ++k) {
			arg2_str[k] = vm_instr->line[i];
		}
		arg2_str[k] = '\0';

		if (!str_to_u16(&arg2_u16, arg2_str)) {
			return false;
		}
		// TODO check if > 65535 (maybe?)
		vm_instr->arg2 = arg2_u16;
		return true;
	}

	err("error: can't parse 2nd arg from instruction type\n");
	return false;
}

bool parse_line(struct vm_instruction_t *vm_instr)
{
	vm_instr->cmd = C_UNUSED;
	vm_instr->arg1 = NULL;
	vm_instr->arg2 = 0;  // 0 is still valid value

	if (!parse_command_type(vm_instr))
		return false;
	if (!parse_arg1(vm_instr))
		return false;
	if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP
	    || vm_instr->cmd == C_FUNCTION || vm_instr->cmd == C_CALL)
		if (!parse_arg2(vm_instr))
			return false;

	return true;
}

#endif // _PARSER_H