x1phosura git — nand2tetris (dd033e2c73975724c23d2bb1210383941b35798e): projects/06/assembler1/assembler1.c

projects/06/assembler1/assembler1.c
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "../bindump.h"

#define DEBUG(...)                printf(__VA_ARGS__)

#define die(err_msg, exit_val)  perror(err_msg); exit(exit_val)
#define alert(...)              fprintf(stderr, __VA_ARGS__)


uint32_t myatoi(const char *a_field_str)
{
	DEBUG("a_field_str = %s\n", a_field_str);
	// I know, I could just use atoi(), but where's the fun in that?
	// STUB
	return 0xaff;
}

bool parse_a_type(const char *line, uint16_t *instruction)
{
	bool num_found = false;
	char c, a_field_str[6];  // TODO: eventually factor out use of array
	uint32_t a_field = 0;
	size_t i, a = 0;

	i = 0;
	while ((c = line[i]) != '\n') {
		if (c == ' ' || c == '\t' || c == '/') {
			if (num_found) {
				num_found = false;
				if (a < 7) {
					a_field_str[a] = '\0';
					break;
				} else {
					die("fatal error: a-type value index "
					    "%lu out of range\n", -1);
				}
			} else {
				;  // skip whitespace
			}
		} else if (num_found) {
			if ('0' <= c && c <= '9') {
				if (a > 4) {
					alert("error: @<number> too long\n");
					return false;
				}
				a_field_str[a] = c; // get number
			} else {
				alert("syntax error: invalid char '%c' found "
				      "after @\n", c);
				return false;
			}
			++a;
		} else if (c == '@') {
			a = 0;
			num_found = true;
		} else {
			alert("syntax error: invalid char '%c' before @\n", c);
			return false;
		}
		++i;
	}
	// TODO: extension: support negative numbers
	a_field = myatoi(a_field_str);
	if (a_field > 32767) {
		alert("error: %u > 32767, too large\n", a_field);
		return false;
	}

	*instruction = 0x0000 | (uint16_t) a_field;
	return true; // STUB, A-type MSB == 0 anyway
}

bool parse_c_type(const char *line, uint16_t *instruction)
{
	DEBUG("parse_c_type()\n");
	*instruction = 0x8000; // STUB, C-type MSB == 1 anyway
	return true; // STUB, C-type MSB == 1 anyway
}

// does not care about line line length; exits at first newline or after
// relevant portion parsed (allows for syntactically-incorrect lines, I know)
bool parse_instruction_w_type(const char *line, uint16_t *instruction)
{
	bool ret;
	char c;
	size_t i = 0;

	while ((c = line[i]) != '\n') {
		if (c == ' ' || c == '\t')
			;  // skip whitespace
		else if (c == '@') {
			ret = parse_a_type(line, instruction);
			break;
		} else if (c >= '!' && c < '~') {
			ret = parse_c_type(line, instruction);
			break;
		} else {
			die("error: TODO explain...\n", -1);
		}

		++i;
	}

	return ret;
}

bool parse_next_instruction(const char *line, uint16_t *instruction)
{
	*instruction = 1337; // STUB, should be 0x539, 0b0000010100111001
	return parse_instruction_w_type(line, instruction);

	return true;
}

/* return false for comment or invalid assembly instruction */
bool parse_line(const char *line, size_t line_len, uint16_t *instruction)
{
	char c;
	bool slash_found = false;
	size_t i;

	if (line_len == 0 || line_len == 1)
		return false;

	// filter out comment lines
	//for (i = 0; (c = line[i]) != NULL; ++i) {
	for (i = 0; i < line_len; ++i) {
		c = line[i];

		if (c == ' ' || c == '\t') {
			continue;
		} else if (c == '/') {
			if (slash_found) {
				// second slash means this is a comment
				return false;
			}
			slash_found = true;
			continue;
		} else if (slash_found) {
			// this char not slash, but previous was: invalid syntax
			// TODO: add line, column numbers
			alert("syntax error: found '/', comments need '//'\n");
			return false;
		} else {
			// non-whitespace/slash char discovered
			break;
		}
	}

	// comment not found, so attempting to parse instruction
	return parse_next_instruction(line, instruction);
}

const char *test_lines[] = {"// this is a comment\n",               // line 1
			    "@12345\n",                             // line 2
			    "\n",                                   // line 3
                            "M+1\n",                                // line 4
                            "@98 // test comment\n",                // line 5
                            "// this is another comment\n",         // line 6
                            "/ this is a broken comment\n",         // line 7
                            "D=M;JNE\n",                            // line 8
			    " @1337// immediate comment \n",        // line 9
			    "\t@13371337 // number too long \n",    // line 10
			    "    @0x1337 // invalid char in num\n"}; // line 11
size_t num_test_lines = (sizeof(test_lines) / sizeof(test_lines[0]));

int main()
{
	// TODO: read file by lines, parse instructions
	bool result = false; 
	uint16_t instruction;
	size_t test_line_len;

	// New organization allows code to eventually work with lines fetched
	// from a 'fgets()' loop or something

	// for testing
	for (size_t i = 0; i < num_test_lines; ++i) {
		DEBUG("%lu|%s", i+1, test_lines[i]);
		test_line_len = strlen(test_lines[i]);
		result = parse_line(test_lines[i], test_line_len, &instruction);
		if (result) {
			DEBUG("instruction: 0x%x  |  ", instruction);
			bindump_word16(instruction); // output instruction as binary
			putchar('\n');
		}
	}

	return 0;
}
all repos — nand2tetris @ dd033e2c73975724c23d2bb1210383941b35798e

my nand2tetris progress