Brainfuck Interpreter in C

Hello everyone

On the last article about writing a brainfuck interpreter, we saw that it did not works well. So today I will show how I debug this program. I solve the problem by the print it method.

The C language is well known from Linux source code for example but in the other hand, this language is well known from undefined behaviors. I admit that I spent a lot of time on finding a way to debug this interpreter but finally I achieve the goal. Now I want to show how I did it.

I throw to the code some puts("instruction: <instruction>"); fflush(stdout); instructions. These lines are under the most important instructions of our interpreter. "What is fflush(stdout) call?" - you can ask. This is the answer:

C standard library throws each character to the buffer when we want to print something to stdout. After this, it sends all characters to stdout and redraws the screen. For example:

puts("a");
puts("b");

The buffer will look like: [ab]

It works when no error appears, but when we have for example undefined behavior like in our program then the C standard library doesn't send characters from the buffer to stdout. But we can tell - "Hey, I want to see my characters on the screen and you have to send it to stdout right now!". To force it we can call fflush(stdout).

I assume that you understand this so now take a look at the code with these added debugging instructions:

	#include <stdio.h>
	#include <sysexits.h>
	#include <string.h>
	#include <stdlib.h>

	#define EX_SUCCESS 0

	#define str1_is_less_than_str2 (strcmp("bf", file_extension) < 0)
	#define str2_is_less_than_str1 (strcmp("bf", file_extension) > 0)

	#define MEMSIZE 30000
	#define NUMBER_OF_BF_INSTRUCTIONS 8

	#define FIRST_ELEMENT_NOT_EXISTS (*head_ptr == NULL)

	struct bf_instruction_node
	{
	int bf_instruction;
	struct bf_instruction_node *next_element;
	};

	struct stack_node
	{
	struct bf_instruction_node *bf_instr_ptr;
	struct stack_node *link;
	};

	void inc_ptr(int **values_ptr)
	{
	++(*values_ptr);
	}

	void dec_ptr(int **values_ptr)
	{
	--(*values_ptr);
	}

	void check_if_value_is_correct(int *values_ptr)
	{
	if( *values_ptr < 0 )
	{
	*values_ptr = 255;
	}
	else if( *values_ptr > 255 )
	{
	*values_ptr = 0;
	}
	}

	void inc_value(int *values_ptr)
	{
	++(*values_ptr);
	check_if_value_is_correct(values_ptr);
	}

	void dec_value(int *values_ptr)
	{
	--(*values_ptr);
	check_if_value_is_correct(values_ptr);
	}

	void print_value(int *values_ptr)
	{
	putchar(*values_ptr);
	}

	void input_value(int *values_ptr)
	{
	*values_ptr = getchar();
	}

	void push(struct stack_node *esp_ptr, struct bf_instruction_node current_instr_ptr)
	{
	struct stack_node *new_element_on_the_stack;

	new_element_on_the_stack = (struct stack_node *)malloc(sizeof(struct stack_node));
	new_element_on_the_stack->bf_instr_ptr = current_instr_ptr;
	new_element_on_the_stack->link = *esp_ptr;
	*esp_ptr = new_element_on_the_stack;
	}

	void pop(struct stack_node **esp_ptr)
	{
	struct stack_node *tmp;

	tmp = *esp_ptr;
	esp_ptr = (esp_ptr)->link;
	free(tmp);
	}

	void start_loop(int values_ptr, struct stack_node esp_ptr, struct bf_instruction_node *current_instr_ptr)
	{
	if( *values_ptr != 0 )
	{
	push(esp_ptr, *current_instr_ptr);
	}
	else
	{
	int balance = 0;

	do
	{
	if( (*current_instr_ptr)->bf_instruction == '[' )
	{
	balance++;
	}
	else if ( (*current_instr_ptr)->bf_instruction == ']' )
	{
	balance--;
	}
	if( balance != 0 )
	{
	current_instr_ptr = (current_instr_ptr)->next_element;
	}
	} while( balance > 0 );
	}
	}

	void end_loop(int values_ptr, struct bf_instruction_node current_instr_ptr, struct stack_node *esp_ptr)
	{
	if( *values_ptr != 0 )
	{
	check_if_value_is_correct(values_ptr);
	current_instr_ptr = (esp_ptr)->bf_instr_ptr;
	}
	else
	{
	pop(esp_ptr);
	}
	}

	void execute_instructions(int *values_ptr, struct bf_instruction_node head_ptr, struct bf_instruction_node **current_instr_ptr)
	{
	struct stack_node *esp_ptr = NULL;

	char brainfuck_instruction;
	*current_instr_ptr = head_ptr;

	while( *current_instr_ptr != NULL )
	{
	brainfuck_instruction = (*current_instr_ptr)->bf_instruction;

	switch( brainfuck_instruction )
	{
	case '>': inc_ptr(values_ptr); break;
	case '<': dec_ptr(values_ptr); break;
	case '+': inc_value(*values_ptr); break;
	case '-': dec_value(*values_ptr); break;
	case '.': print_value(*values_ptr); break;
	case ',': input_value(*values_ptr); break;
	case '[': start_loop(*values_ptr, &esp_ptr, current_instr_ptr); break;
	case ']': end_loop(*values_ptr, current_instr_ptr, &esp_ptr); break;
	}
	current_instr_ptr = (current_instr_ptr)->next_element;
	}
	}

	struct bf_instruction_node create_new_element(struct bf_instruction_node head_ptr, struct bf_instruction_node **current_instr_ptr, int char_from_file)
	{
	struct bf_instruction_node *new_element;

	*current_instr_ptr = head_ptr;

	while( (*current_instr_ptr)->next_element != NULL )
	current_instr_ptr = (current_instr_ptr)->next_element;

	new_element = (struct bf_instruction_node *)malloc(sizeof(struct bf_instruction_node));

	return new_element;
	}

	void add_instruction_to_the_list(struct bf_instruction_node head_ptr, struct bf_instruction_node current_instr_ptr, int char_from_file)
	{
	if( FIRST_ELEMENT_NOT_EXISTS )
	{
	head_ptr = (struct bf_instruction_node )malloc(sizeof(struct bf_instruction_node));

	if( *head_ptr == NULL )
	{
	perror("Memory allocation failed");
	exit(EXIT_FAILURE);
	}
	else
	{
	(*head_ptr)->bf_instruction = char_from_file;
	(*head_ptr)->next_element = NULL;
	current_instr_ptr = head_ptr;
	}
	}
	else
	{
	struct bf_instruction_node new_element = create_new_element(head_ptr, current_instr_ptr, char_from_file);

	if( new_element == NULL )
	{
	perror("Memory allocation failed.");
	exit(EXIT_FAILURE);
	}
	else
	{
	new_element->bf_instruction = char_from_file;
	new_element->next_element = NULL;
	(*current_instr_ptr)->next_element = new_element;
	*current_instr_ptr = new_element;
	}
	}
	}

	void print_instructions(struct bf_instruction_node head_ptr, struct bf_instruction_node *current_instr_ptr)
	{
	*current_instr_ptr = head_ptr;

	while( *current_instr_ptr != NULL )
	{
	printf("%c", (*current_instr_ptr)->bf_instruction);
	current_instr_ptr = (current_instr_ptr)->next_element;
	}
	}

	void clear_the_memory(struct bf_instruction_node head_ptr, struct bf_instruction_node *current_instr_ptr)
	{
	struct bf_instruction_node *earlier_element;

	*current_instr_ptr = head_ptr;

	while( (*current_instr_ptr) != NULL )
	{
	earlier_element = *current_instr_ptr;
	current_instr_ptr = (current_instr_ptr)->next_element;
	free(earlier_element);
	}

	puts("Memory is cleared.");
	}

	const char *is_bf_instruction(int char_from_file)
	{
	const char bf_alphabet[NUMBER_OF_BF_INSTRUCTIONS] = {'>', '<', '+', '-', ',', '.', '[', ']'};
	return memchr(bf_alphabet, char_from_file, sizeof(bf_alphabet));
	}

	const char get_file_extension(const char filename)
	{
	const char *dot = strchr(filename, '.');

	if( dot == NULL )
	return NULL;

	const char *file_extension = dot + 1;
	return file_extension;
	}

	int main(int argc, char **argv)
	{
	if( argc != 2 )
	{
	fprintf(stderr, "File not specified.\n");
	puts("usage: ./bf_interpreter <filename.bf>");
	return EX_USAGE;
	}

	const char *filename = argv[1];
	const char *file_extension = get_file_extension(filename);

	if( file_extension == NULL \|\| str1_is_less_than_str2 \|\| str2_is_less_than_str1 )
	{
	fprintf(stderr, "Incorrect file extension.\n");
	puts("usage: ./bf_interpreter <filename.bf>");
	return EX_DATAERR;
	}

	FILE *file_with_bf_code = fopen(filename, "r");

	if( file_with_bf_code == NULL )
	{
	perror(filename);
	return EX_NOINPUT;
	}

	int values[MEMSIZE] = {0};
	int *values_ptr = values;
	int char_from_file;

	struct bf_instruction_node head_ptr = NULL, current_instr_ptr;

	while( (char_from_file = fgetc(file_with_bf_code)) != EOF )
	{
	if( is_bf_instruction(char_from_file) != NULL )
	add_instruction_to_the_list(&head_ptr, &current_instr_ptr, char_from_file);
	}

	execute_instructions(&values_ptr, head_ptr, &current_instr_ptr);
	clear_the_memory(head_ptr, &current_instr_ptr);

	return EX_SUCCESS;
	}

view raw brainfuck_interpreter.c hosted with ❤ by GitHub

Now let's test in and see what we will get on the screen.

Now we have something more than only the error. But the most important part of this is the end of all executed instruction:

Well, the last instruction of this set is decrementing value. I think this is not what we are looking for. SEGFAULT error usually raises when the pointer is set on the protected memory page. So we should find an error with some pointer in our program.

And here it is! These instructions appear when current_instr_ptr is pointing to the ']' instruction and the memory cell from the values array is not equal to zero. What we are going to do is to set our current_instr_ptr to the start of the loop so to the '[' instruction and move it to the next instruction. So we should do it like this:

1) *current_instr_ptr = (*esp_ptr)->bf_instr_ptr;
2) *current_instr_ptr = (*current_instr_ptr)->next_element;

This is the correct way to handle nested loops. After 'print it' debugging we can see that our program does it in a bad way:

1) *current_instr_ptr = (*esp_ptr)->bf_instr_ptr;
2) *current_instr_ptr = (*current_instr_ptr)->next_element;
3) *current_instr_ptr = (*current_instr_ptr)->next_element;

When we already know what is the issue we can look at the end_loop function and correct the mistake.

And now we can take look at the execute_instructions function.

After executing of each instruction our instruction pointer is moved to the next instruction. So we can throw our *current_instr_ptr = (*current_instr_ptr)->next_element; from the end_loop function and everything should be fine. Let's try.

Ok! Now we don't get any SEGFAULT error! But we should check what our program prints at the screen and check if it is correct. First of all, we need to comment each puts(<Something>); fflush(stdout); in the code. When we do this, we can compile the interpreter and execute it.

For this example, our program works correctly. But we should check more examples to prove that the interpreter handles all correct brainfuck codes.

This is the code of our example now. You can copy it from https://copy.sh/brainfuck/?file=https://copy.sh/brainfuck/prog/quine505.b

Let's try to execute this code with our interpreter.

Unfortunately, the infinite loop is not a good sign. Where is the problem? We can guess that the problem is on the representation of values because the pointers work correctly after the fix. So we should print the values to look where is the issue.

Let's execute the program and see what we have got here.

Ok, we have the values from the interpretation of the brainfuck code. Now I will show you such a great tool for debugging brainfuck interpreters or brainfuck programs. This tool is brainfuck visualizer - https://fatiherikli.github.io/brainfuck-visualizer/

One click on the step section is enough to indicate an error! Our brainfuck interpreter works on the 8-bits memory cells, right? So we should add to the program this checks:

if( *values_ptr < 0 )
*values_ptr = 255;
else if( *values_ptr > 255 )
*values_ptr = 0;

Let's add it to the functions which work with the values. Hmm... This is not a good idea because we don't want to repeat our code. This is not a good practice. We should write a function which will check if the value is less than 0 or more than 255.

And now we can check if our interpreter works correctly with this example.

This is not what we are exactly looking for. :D But we are close to the solution of this issue. The problem is in the implementation of loops. When our pointer is pointing to the '[' instruction and if the memory cell is equal to zero then we have to jump over the whole loop. To do this we need to implement a loop and count the '[' and ']'. We will do it like this:

int balance;
if( *current_instr_ptr->bf_instruction == '[' )
balance++;
else if( *current_instr_ptr->bf_instruction == ']' )
balance--;
if( balance == 0 )
jump over the whole loop

Look at the code:

Let's compile our code and execute it.

Hurray, it finally works! And this is the end of the road. Thanks for your attention.

I note that I'm not an expert so the code might not be perfect. Constructive criticism is welcome!

The whole code of the program:

	#include <stdio.h>
	#include <sysexits.h>
	#include <string.h>
	#include <stdlib.h>

	#define EX_SUCCESS 0

	#define str1_is_less_than_str2 (strcmp("bf", file_extension) < 0)
	#define str2_is_less_than_str1 (strcmp("bf", file_extension) > 0)

	#define MEMSIZE 30000
	#define NUMBER_OF_BF_INSTRUCTIONS 8

	#define FIRST_ELEMENT_NOT_EXISTS (*head_ptr == NULL)

	struct bf_instruction_node
	{
	int bf_instruction;
	struct bf_instruction_node *next_element;
	};

	struct stack_node
	{
	struct bf_instruction_node *bf_instr_ptr;
	struct stack_node *link;
	};

	void inc_ptr(int **values_ptr)
	{
	++(*values_ptr);
	}

	void dec_ptr(int **values_ptr)
	{
	--(*values_ptr);
	}

	void check_if_value_is_correct(int *values_ptr)
	{
	if( *values_ptr < 0 )
	{
	*values_ptr = 255;
	}
	else if( *values_ptr > 255 )
	{
	*values_ptr = 0;
	}
	}

	void inc_value(int *values_ptr)
	{
	++(*values_ptr);
	check_if_value_is_correct(values_ptr);
	}

	void dec_value(int *values_ptr)
	{
	--(*values_ptr);
	check_if_value_is_correct(values_ptr);
	}

	void print_value(int *values_ptr)
	{
	putchar(*values_ptr);
	}

	void input_value(int *values_ptr)
	{
	*values_ptr = getchar();
	}

	void push(struct stack_node *esp_ptr, struct bf_instruction_node current_instr_ptr)
	{
	struct stack_node *new_element_on_the_stack;

	new_element_on_the_stack = (struct stack_node *)malloc(sizeof(struct stack_node));
	new_element_on_the_stack->bf_instr_ptr = current_instr_ptr;
	new_element_on_the_stack->link = *esp_ptr;
	*esp_ptr = new_element_on_the_stack;
	}

	void pop(struct stack_node **esp_ptr)
	{
	struct stack_node *tmp;

	tmp = *esp_ptr;
	esp_ptr = (esp_ptr)->link;
	free(tmp);
	}

	void start_loop(int values_ptr, struct stack_node esp_ptr, struct bf_instruction_node *current_instr_ptr)
	{
	if( *values_ptr != 0 )
	{
	push(esp_ptr, *current_instr_ptr);
	}
	else
	{
	int balance = 0;

	do
	{
	if( (*current_instr_ptr)->bf_instruction == '[' )
	{
	balance++;
	}
	else if ( (*current_instr_ptr)->bf_instruction == ']' )
	{
	balance--;
	}
	if( balance != 0 )
	{
	current_instr_ptr = (current_instr_ptr)->next_element;
	}
	} while( balance > 0 );
	}
	}

	void end_loop(int values_ptr, struct bf_instruction_node current_instr_ptr, struct stack_node *esp_ptr)
	{
	if( *values_ptr != 0 )
	{
	check_if_value_is_correct(values_ptr);
	current_instr_ptr = (esp_ptr)->bf_instr_ptr;
	}
	else
	{
	pop(esp_ptr);
	}
	}

	void execute_instructions(int *values_ptr, struct bf_instruction_node head_ptr, struct bf_instruction_node **current_instr_ptr)
	{
	struct stack_node *esp_ptr = NULL;

	char brainfuck_instruction;
	*current_instr_ptr = head_ptr;

	while( *current_instr_ptr != NULL )
	{
	brainfuck_instruction = (*current_instr_ptr)->bf_instruction;

	switch( brainfuck_instruction )
	{
	case '>': inc_ptr(values_ptr); break;
	case '<': dec_ptr(values_ptr); break;
	case '+': inc_value(*values_ptr); break;
	case '-': dec_value(*values_ptr); break;
	case '.': print_value(*values_ptr); break;
	case ',': input_value(*values_ptr); break;
	case '[': start_loop(*values_ptr, &esp_ptr, current_instr_ptr); break;
	case ']': end_loop(*values_ptr, current_instr_ptr, &esp_ptr); break;
	}
	current_instr_ptr = (current_instr_ptr)->next_element;
	}
	}

	struct bf_instruction_node create_new_element(struct bf_instruction_node head_ptr, struct bf_instruction_node **current_instr_ptr, int char_from_file)
	{
	struct bf_instruction_node *new_element;

	*current_instr_ptr = head_ptr;

	while( (*current_instr_ptr)->next_element != NULL )
	current_instr_ptr = (current_instr_ptr)->next_element;

	new_element = (struct bf_instruction_node *)malloc(sizeof(struct bf_instruction_node));

	return new_element;
	}

	void add_instruction_to_the_list(struct bf_instruction_node head_ptr, struct bf_instruction_node current_instr_ptr, int char_from_file)
	{
	if( FIRST_ELEMENT_NOT_EXISTS )
	{
	head_ptr = (struct bf_instruction_node )malloc(sizeof(struct bf_instruction_node));

	if( *head_ptr == NULL )
	{
	perror("Memory allocation failed");
	exit(EXIT_FAILURE);
	}
	else
	{
	(*head_ptr)->bf_instruction = char_from_file;
	(*head_ptr)->next_element = NULL;
	current_instr_ptr = head_ptr;
	}
	}
	else
	{
	struct bf_instruction_node new_element = create_new_element(head_ptr, current_instr_ptr, char_from_file);

	if( new_element == NULL )
	{
	perror("Memory allocation failed.");
	exit(EXIT_FAILURE);
	}
	else
	{
	new_element->bf_instruction = char_from_file;
	new_element->next_element = NULL;
	(*current_instr_ptr)->next_element = new_element;
	*current_instr_ptr = new_element;
	}
	}
	}

	void print_instructions(struct bf_instruction_node head_ptr, struct bf_instruction_node *current_instr_ptr)
	{
	*current_instr_ptr = head_ptr;

	while( *current_instr_ptr != NULL )
	{
	printf("%c", (*current_instr_ptr)->bf_instruction);
	current_instr_ptr = (current_instr_ptr)->next_element;
	}
	}

	void clear_the_memory(struct bf_instruction_node head_ptr, struct bf_instruction_node *current_instr_ptr)
	{
	struct bf_instruction_node *earlier_element;

	*current_instr_ptr = head_ptr;

	while( (*current_instr_ptr) != NULL )
	{
	earlier_element = *current_instr_ptr;
	current_instr_ptr = (current_instr_ptr)->next_element;
	free(earlier_element);
	}

	puts("Memory is cleared.");
	}

	const char *is_bf_instruction(int char_from_file)
	{
	const char bf_alphabet[NUMBER_OF_BF_INSTRUCTIONS] = {'>', '<', '+', '-', ',', '.', '[', ']'};
	return memchr(bf_alphabet, char_from_file, sizeof(bf_alphabet));
	}

	const char get_file_extension(const char filename)
	{
	const char *dot = strchr(filename, '.');

	if( dot == NULL )
	return NULL;

	const char *file_extension = dot + 1;
	return file_extension;
	}

	int main(int argc, char **argv)
	{
	if( argc != 2 )
	{
	fprintf(stderr, "File not specified.\n");
	puts("usage: ./bf_interpreter <filename.bf>");
	return EX_USAGE;
	}

	const char *filename = argv[1];
	const char *file_extension = get_file_extension(filename);

	if( file_extension == NULL \|\| str1_is_less_than_str2 \|\| str2_is_less_than_str1 )
	{
	fprintf(stderr, "Incorrect file extension.\n");
	puts("usage: ./bf_interpreter <filename.bf>");
	return EX_DATAERR;
	}

	FILE *file_with_bf_code = fopen(filename, "r");

	if( file_with_bf_code == NULL )
	{
	perror(filename);
	return EX_NOINPUT;
	}

	int values[MEMSIZE] = {0};
	int *values_ptr = values;
	int char_from_file;

	struct bf_instruction_node head_ptr = NULL, current_instr_ptr;

	while( (char_from_file = fgetc(file_with_bf_code)) != EOF )
	{
	if( is_bf_instruction(char_from_file) != NULL )
	add_instruction_to_the_list(&head_ptr, &current_instr_ptr, char_from_file);
	}

	execute_instructions(&values_ptr, head_ptr, &current_instr_ptr);
	clear_the_memory(head_ptr, &current_instr_ptr);

	return EX_SUCCESS;
	}

view raw brainfuck_interpreter.c hosted with ❤ by GitHub

Search This Blog

shizz3r IT tech blog

Brainfuck Interpreter in C - fifth day

Comments

Post a Comment

Popular posts from this blog

Learning of malware analysis. Solving labs from the "Analyzing malicious Windows programs" chapter from the "Practical Malware Anlysis" book

PicoCTF 2018 - Reverse Engineering writeups

Learning of malware analysis. Solving 9-2 lab from the "OllyDbg" chapter. ("Practical Malware Analysis" book)