fix: remove cluttered state machine code

This commit is contained in:
0xdeadbeer 2024-09-03 21:07:52 +02:00
parent 7b06d6c6fc
commit 7b8bb91e2f
7 changed files with 90 additions and 185 deletions

View File

@ -1,6 +1,6 @@
CC=gcc CC=gcc
CFLAGS=-g3 CFLAGS=-g3
CFILES=tinyparser.c headers.c CFILES=tinyparser.c strings.c
CFILES_STREECMP=streecmp/streecmp.c CFILES_STREECMP=streecmp/streecmp.c
OUTPUT=tinyparser OUTPUT=tinyparser

1
README
View File

@ -20,7 +20,6 @@ TODO
* headers table needs length fields * headers table needs length fields
* refactor header parsing * refactor header parsing
* refactor state machine
* implement tests * implement tests
* integrations with tinyproxy..? * integrations with tinyproxy..?

@ -1 +1 @@
Subproject commit 42ed96353d186078e34e4c541e7c92d433cda138 Subproject commit ca1293aa8f123adb91d80f687933ea5c596c114a

View File

@ -1,3 +1,13 @@
char *methods = "GET\n"
"HEAD\n"
"POST\n"
"PUT\n"
"DELETE\n"
"CONNECT\n"
"OPTIONS\n"
"TRACE\n"
"PATCH\n";
char *headers = "A-IM\n" char *headers = "A-IM\n"
"Accept\n" "Accept\n"
"Accept-Charset\n" "Accept-Charset\n"

Binary file not shown.

View File

@ -5,16 +5,16 @@
#include "streecmp/streecmp.h" #include "streecmp/streecmp.h"
extern char *headers; extern char *headers;
extern char *methods;
enum errs err = err_init; int method = 0;
enum states state = state_init;
enum methods method = method_init;
char *uri = NULL; char *uri = NULL;
char *ver = NULL; char *ver = NULL;
int uri_len = 0; int uri_len = 0;
int ver_len = 0; int ver_len = 0;
struct nod *header_tree = NULL; struct nod *header_tree = NULL;
struct nod *method_tree = NULL;
char **header_table = NULL; char **header_table = NULL;
int read_line(char **buffer, char **buffer_limit) { int read_line(char **buffer, char **buffer_limit) {
@ -35,39 +35,6 @@ int read_line(char **buffer, char **buffer_limit) {
return ret; return ret;
} }
int cmp_orig(char *ch, char *orig, char *limit) {
int diff = 0;
for (; *orig != '\0' && *ch != '\0' && ch < limit; orig++, ch++, diff++)
if (*orig != *ch)
return -1;
return diff;
}
int cmp_ascii(char *ch, char *limit) {
int diff = 0;
for (; *ch != '\0' && *ch != ' ' && ch < limit; ch++, diff++) {
if (*ch >= '0' && *ch <= '9') continue;
if (*ch >= 'A' && *ch <= 'Z') continue;
if (*ch >= 'a' && *ch <= 'z') continue;
if (*ch >= '#' && *ch <= '/') continue;
if (*ch == '!') continue;
if (*ch == ':') continue;
if (*ch == ';') continue;
if (*ch == '=') continue;
if (*ch == '?') continue;
if (*ch == '@') continue;
if (*ch == '[') continue;
if (*ch == ']') continue;
if (*ch == '_') continue;
if (*ch == '~') continue;
return -1;
}
return diff;
}
int parse_header(char *offset, int len) { int parse_header(char *offset, int len) {
int cursor = 0; int cursor = 0;
int ret = 0; int ret = 0;
@ -94,108 +61,55 @@ int parse_header(char *offset, int len) {
} }
int parse_title(char *offset, int len) { int parse_title(char *offset, int len) {
int cursor = 0;
while (state != state_title_ok && state != state_title_err) {
if (cursor >= len) {
state = state_title_err;
err = err_parse_title;
}
int ret = 0; int ret = 0;
char *ch = offset+cursor; int diff = 0;
char *lim = offset+len-1; char *cursor = offset;
char *cursor_lim = cursor+len;
switch (state) { // method
case state_init: char *method_lim = strchr(offset, ' ');
state = state_title_method; if (!method_lim) {
break;
case state_title_method:
if ((ret = cmp_orig(ch, "GET", lim))) {
cursor += ret;
state = state_title_after_method;
method = method_get;
break;
}
if ((ret = cmp_orig(ch, "POST", lim))) {
cursor += ret;
state = state_title_after_method;
method = method_post;
break;
}
if ((ret = cmp_orig(ch, "HEAD", lim))) {
cursor += ret;
state = state_title_after_method;
method = method_head;
break;
}
state = state_title_err;
err = err_parse_title_method;
break;
case state_title_after_method:
cursor++;
state = state_title_uri;
break;
case state_title_uri:
if ((ret = cmp_ascii(ch, lim)) <= 0) {
state = state_title_err;
err = err_parse_title_uri;
break;
}
if (!(uri = strndup(ch, ret))) {
state = state_title_err;
err = err_memory;
break;
}
uri_len = ret;
cursor += ret;
state = state_title_after_uri;
break;
case state_title_after_uri:
cursor++;
state = state_title_version;
break;
case state_title_version:
if ((ret = cmp_ascii(ch, lim)) <= 0) {
state = state_title_err;
err = err_parse_title_version;
break;
}
if (!(ver = strndup(ch, ret))) {
state = state_title_err;
err = err_memory;
break;
}
ver_len = ret;
cursor += ret;
state = state_title_ok;
break;
case state_title_ok:
for (;ch < lim;ch++) {
if (*ch == ' ') continue;
if (*ch == '\t') continue;
if (*ch == '\0') continue;
if (*ch == '\r') continue;
state = state_title_err;
err = err_parse_title;
break;
}
break;
case state_title_err:
fprintf(stderr, "failed\n");
return -1; return -1;
break;
default:
break;
} }
diff = method_lim-cursor;
ret = streencmp(method_tree, cursor, diff);
if (ret == 0) {
return -1;
} }
method = ret;
cursor += diff;
// white space
while (*cursor == ' ') {
cursor++;
}
// uri
char *uri_lim = strchr(cursor, ' ');
if (!uri_lim) {
return -1;
}
diff = uri_lim-cursor;
uri = cursor;
uri_len = diff;
cursor += diff;
// white space
while (*cursor == ' ') {
cursor++;
}
// ver
diff = cursor_lim-cursor;
ver = cursor;
ver_len = diff;
cursor += diff;
return 0; return 0;
} }
@ -217,7 +131,6 @@ int parse_request(char *buffer) {
// IF END OF MESSAGE // IF END OF MESSAGE
if (!header_len) { if (!header_len) {
state = state_fin;
break; break;
} }
@ -227,9 +140,6 @@ int parse_request(char *buffer) {
} }
} }
free(uri);
free(ver);
return 0; return 0;
} }
@ -249,8 +159,20 @@ int main(void) {
return -1; return -1;
} }
method_tree = allocnod();
if (!method_tree) {
fprintf(stderr, "Not enough dynamic memory\n");
return -1;
}
ret = gentree(header_tree, headers, NULL); ret = gentree(header_tree, headers, NULL);
if (gentree < 0) { if (ret < 0) {
fprintf(stderr, "Failed generating the header comparison tree\n");
return -1;
}
ret = gentree(method_tree, methods, NULL);
if (ret < 0) {
fprintf(stderr, "Failed generating the header comparison tree\n"); fprintf(stderr, "Failed generating the header comparison tree\n");
return -1; return -1;
} }
@ -261,8 +183,14 @@ int main(void) {
return -1; return -1;
} }
fprintf(stderr, "Finished parsing request\n"); fprintf(stderr, "[+] Finished parsing request\n"
" method: %d\n"
" uri : %.*s\n"
" ver : %.*s\n",
method, uri_len, uri, ver_len, ver
);
frenod(method_tree);
frenod(header_tree); frenod(header_tree);
free(header_table); free(header_table);
free(str); free(str);

View File

@ -13,49 +13,17 @@
// ENUMS // ENUMS
// SRC:https://datatracker.ietf.org/doc/html/rfc2616
enum errs {
err_init = 0,
err_generic,
err_memory,
err_parse_title,
err_parse_title_method,
err_parse_title_uri,
err_parse_title_version,
err_parse_header_not_found
};
// SRC:https://datatracker.ietf.org/doc/html/rfc2616
enum states {
state_init = 0,
state_title_method,
state_title_after_method,
state_title_uri,
state_title_after_uri,
state_title_version,
state_title_ok,
state_title_err,
state_header_init,
state_header_alloc,
state_header_parse,
state_header_not_found,
state_header_ok,
state_header_err,
state_body_init,
state_body_ok,
state_body_err,
state_fin,
};
// SRC:https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods // SRC:https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods
enum methods { enum methods {
method_init = 0, method_get = 1,
method_get, method_head,
method_post, method_post,
method_head method_put,
method_delete,
method_connect,
method_options,
method_trace,
method_patch
}; };
// SRC:https://en.wikipedia.org/wiki/List_of_HTTP_header_fields // SRC:https://en.wikipedia.org/wiki/List_of_HTTP_header_fields