x1phosura git — nand2tetris (2fd8ddb880bd1d0e07cd42f8204464c6c4885713): projects/08/src/parser.h

projects/08/src/parser.h
#ifndef _PARSER_H
#define _PARSER_H

// 'parser.h' roughly corresponds to the 'Parser' module specified in
// nand2tetris, with a few liberties taken.

#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "util.h"

#define _DEBUG

#define CMD_STR_MAX_LEN (8)
#define ARG2_MAX_LEN    (5)  // arg2 can only be a number between 0 and 65535
#define ARG1_MAX_LEN    (MAX_LINE_LEN - CMD_STR_MAX_LEN - ARG2_MAX_LEN - 3)
//#define ARG1_MAX_LEN    128  // temp value for debugging

enum vm_command_t {
	C_ARITHMETIC,
	C_PUSH,
	C_POP,
	C_LABEL,
	C_GOTO,
	C_IF,
	C_FUNCTION,
	C_RETURN,
	C_CALL,
	C_UNUSED
};

char *arith_cmd_lut[] = {"add", "sub", "neg", "eq", "gt", "lt", "and",
                         "or", "not"};
char *cmd_lut[] = {"push", "pop", "label", "goto", "if-goto", "function",
                   "return", "call"};
enum vm_command_t cmd_lut_vals[] = {C_PUSH, C_POP, C_LABEL, C_GOTO, C_IF,
                                    C_FUNCTION, C_RETURN, C_CALL};

struct vm_instruction_t {
	enum vm_command_t cmd;
	char *arg1;             // if C_ARITHMETIC, cmd_str, else first arg
	uint16_t arg2;          // 2nd arg (for push/pop/call/function)
	const char *line;
	char line_len;
};


bool str_to_u16(uint16_t *res, char *s)
{
	size_t i;
	*res = 0;
	for (i = 0; i < 5 && s[i] != '\0'; ++i) {
		if (!is_number(s[i])) {
			err("error: invalid char '%c' in \"%s\"\n", s[i], s);
			return false;
		}
		*res *= 10;
		*res += s[i] - '0';  // add number to result
	}
	return true;
}

bool cleanup_vm_instr(struct vm_instruction_t *vm_instr)
{
	if (vm_instr->arg1 == NULL)
		return true;

	free(vm_instr->arg1);
	vm_instr->arg1 = NULL;
	return true;
}

void print_vm_instruction(struct vm_instruction_t *vm_instr, FILE *fp)
{
	fprintf(fp, "//{\n\t//cmd: %d,\n\t//arg1: \"%s\",\n\t//arg2: %hu,\n//}\n",
	        vm_instr->cmd, vm_instr->arg1, vm_instr->arg2);
}

// TODO: May not need parse_arg1(), parse_arg2(), could just have
// 'parse_command()' and manually read vm_instr attributes when needed. This
// makes sense because the entire command will need to be parsed anyway in
// order to set all the vm_instr attributes; no sense in having redundant
// functions.
// Or not. Separate functions could be useful, as each could have its own
// lookup table rather than 3 separate LUTs cluttering and bloating a single
// function. It doesn't matter _really_, but I do want clean code if possible.
// Will need to think on this...
//
// Expects the vm instruction line to _not_ start with whitespace (previously
// trimmed)
static bool parse_command_type(struct vm_instruction_t *vm_instr)
{
	size_t i;
	char cmd_str[CMD_STR_MAX_LEN + 1];

	// TODO check line_len against CMD_STR_MAX_LEN
	for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0'
	            && (!is_whitespace(vm_instr->line[i])); ++i) {
		cmd_str[i] = vm_instr->line[i];
		// TODO: check if too large, change loop
	}
	cmd_str[i] = '\0';

	for (i = 0; i < (sizeof(arith_cmd_lut) / sizeof(arith_cmd_lut[0])); ++i) {
		if (!strncmp(cmd_str, arith_cmd_lut[i], CMD_STR_MAX_LEN)) {
			vm_instr->cmd = C_ARITHMETIC;
			vm_instr->arg1 = malloc(CMD_STR_MAX_LEN + 1);
			strncpy(vm_instr->arg1, arith_cmd_lut[i], CMD_STR_MAX_LEN);
			return true;
		}
	}
	for (i = 0; i < (sizeof(cmd_lut) / sizeof(cmd_lut[0])); ++i) {
		if (!strncmp(cmd_str, cmd_lut[i], CMD_STR_MAX_LEN)) {
			vm_instr->cmd = cmd_lut_vals[i];
			return true;
		}
	}

	err("error: illegal instruction in line \"%s\"\n", vm_instr->line);
	return false;
}

static bool parse_arg1(struct vm_instruction_t *vm_instr)
{
	size_t i, k;

	if (vm_instr->cmd == C_ARITHMETIC || vm_instr->cmd == C_RETURN) {
		return true;  // 'return' or arg1 is already parsed and correct
	} else if (C_ARITHMETIC < vm_instr->cmd && vm_instr->cmd < C_UNUSED) {
		vm_instr->arg1 = malloc(ARG1_MAX_LEN + 1);
		// skip first token
		for (i = 0; i < CMD_STR_MAX_LEN && vm_instr->line[i] != '\0'
				    && (!is_whitespace(vm_instr->line[i])); ++i)
			;
		if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n'
			                      || vm_instr->line[i] == '\r') {
			err("error: end of line encountered, no first arg\n");
			free(vm_instr->arg1);
			return false;
		}
		// now at second token
		// WARNING possibly an overflow here! TODO fix when I'm not lazy
		for (++i, k = 0; is_symbol_char(vm_instr->line[i])
		                 && i < MAX_LINE_LEN; ++i, ++k) {
			vm_instr->arg1[k] = vm_instr->line[i];
		}
		vm_instr->arg1[k] = '\0';
		return true;
	}

	err("error: illegal first argument in line \"%s\"\n", vm_instr->line);
	return false;
}

static bool parse_arg2(struct vm_instruction_t *vm_instr)
{
	bool in_whitespace = false;
	uint8_t ws_count = 0;
	uint16_t arg2_u16;
	size_t i, k;
	char c, arg2_str[ARG2_MAX_LEN + 1];

	if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP
	    || vm_instr->cmd == C_CALL || vm_instr->cmd == C_FUNCTION) {


		// skip first two tokens
		for (i = 0; i < MAX_LINE_LEN && vm_instr->line[i] != '\0'; ++i) {
			c = vm_instr->line[i];
			if (c == ' ' || c == '\t') {
				if (!in_whitespace) {
					++ws_count;
					in_whitespace = true;
				}
			} else {
				in_whitespace = false;
			}

			if (ws_count >= 2)
				break;  // break after two spaces/tabs found
		}
		if (vm_instr->line[i] == '\0' || vm_instr->line[i] == '\n'
		    || i >= MAX_LINE_LEN) {
			err("error: end of line encountered, no first arg\n");
			free(vm_instr->arg1);
			return false;
		}

		// now at second token
		for (++i, k = 0; k < ARG2_MAX_LEN
		                 && is_number(vm_instr->line[i]); ++i, ++k) {
			arg2_str[k] = vm_instr->line[i];
		}
		arg2_str[k] = '\0';

		if (!str_to_u16(&arg2_u16, arg2_str)) {
			return false;
		}
		// TODO check if > 65535 (maybe?)
		vm_instr->arg2 = arg2_u16;
		return true;
	}

	err("error: can't parse 2nd arg from instruction type\n");
	return false;
}

bool parse_line(struct vm_instruction_t *vm_instr)
{
	vm_instr->cmd = C_UNUSED;
	vm_instr->arg1 = NULL;
	vm_instr->arg2 = 0;  // 0 is still valid value

	if (!parse_command_type(vm_instr))
		return false;
	if (!parse_arg1(vm_instr))
		return false;
	if (vm_instr->cmd == C_PUSH || vm_instr->cmd == C_POP
	    || vm_instr->cmd == C_FUNCTION || vm_instr->cmd == C_CALL)
		if (!parse_arg2(vm_instr))
			return false;

	return true;
}

#endif // _PARSER_H
all repos — nand2tetris @ 2fd8ddb880bd1d0e07cd42f8204464c6c4885713

my nand2tetris progress