diff --git a/Makefile b/Makefile index 1d7bc82..0cd406a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CC=gcc CFLAGS=-g3 -CFILES=tinyparser.c headers.c +CFILES=tinyparser.c strings.c CFILES_STREECMP=streecmp/streecmp.c OUTPUT=tinyparser diff --git a/README b/README index c316e63..b2199d8 100644 --- a/README +++ b/README @@ -20,7 +20,6 @@ TODO * headers table needs length fields * refactor header parsing - * refactor state machine * implement tests * integrations with tinyproxy..? diff --git a/streecmp b/streecmp index 42ed963..ca1293a 160000 --- a/streecmp +++ b/streecmp @@ -1 +1 @@ -Subproject commit 42ed96353d186078e34e4c541e7c92d433cda138 +Subproject commit ca1293aa8f123adb91d80f687933ea5c596c114a diff --git a/headers.c b/strings.c similarity index 93% rename from headers.c rename to strings.c index ccb1e79..673cd6a 100644 --- a/headers.c +++ b/strings.c @@ -1,3 +1,13 @@ +char *methods = "GET\n" + "HEAD\n" + "POST\n" + "PUT\n" + "DELETE\n" + "CONNECT\n" + "OPTIONS\n" + "TRACE\n" + "PATCH\n"; + char *headers = "A-IM\n" "Accept\n" "Accept-Charset\n" diff --git a/tinyparser b/tinyparser index 27b4e40..87609c2 100755 Binary files a/tinyparser and b/tinyparser differ diff --git a/tinyparser.c b/tinyparser.c index 19bc727..e11afae 100644 --- a/tinyparser.c +++ b/tinyparser.c @@ -5,16 +5,16 @@ #include "streecmp/streecmp.h" extern char *headers; +extern char *methods; -enum errs err = err_init; -enum states state = state_init; -enum methods method = method_init; +int method = 0; char *uri = NULL; char *ver = NULL; int uri_len = 0; int ver_len = 0; -struct nod *header_tree = NULL; +struct nod *header_tree = NULL; +struct nod *method_tree = NULL; char **header_table = NULL; int read_line(char **buffer, char **buffer_limit) { @@ -35,39 +35,6 @@ int read_line(char **buffer, char **buffer_limit) { return ret; } -int cmp_orig(char *ch, char *orig, char *limit) { - int diff = 0; - for (; *orig != '\0' && *ch != '\0' && ch < limit; orig++, ch++, diff++) - if (*orig != *ch) - return -1; - - return diff; -} - -int cmp_ascii(char *ch, char *limit) { - int diff = 0; - for (; *ch != '\0' && *ch != ' ' && ch < limit; ch++, diff++) { - if (*ch >= '0' && *ch <= '9') continue; - if (*ch >= 'A' && *ch <= 'Z') continue; - if (*ch >= 'a' && *ch <= 'z') continue; - if (*ch >= '#' && *ch <= '/') continue; - if (*ch == '!') continue; - if (*ch == ':') continue; - if (*ch == ';') continue; - if (*ch == '=') continue; - if (*ch == '?') continue; - if (*ch == '@') continue; - if (*ch == '[') continue; - if (*ch == ']') continue; - if (*ch == '_') continue; - if (*ch == '~') continue; - - return -1; - } - - return diff; -} - int parse_header(char *offset, int len) { int cursor = 0; int ret = 0; @@ -94,108 +61,55 @@ int parse_header(char *offset, int len) { } int parse_title(char *offset, int len) { - int cursor = 0; + int ret = 0; + int diff = 0; + char *cursor = offset; + char *cursor_lim = cursor+len; - while (state != state_title_ok && state != state_title_err) { - if (cursor >= len) { - state = state_title_err; - err = err_parse_title; - } - - int ret = 0; - char *ch = offset+cursor; - char *lim = offset+len-1; - - switch (state) { - case state_init: - state = state_title_method; - break; - case state_title_method: - if ((ret = cmp_orig(ch, "GET", lim))) { - cursor += ret; - state = state_title_after_method; - method = method_get; - break; - } - if ((ret = cmp_orig(ch, "POST", lim))) { - cursor += ret; - state = state_title_after_method; - method = method_post; - break; - } - if ((ret = cmp_orig(ch, "HEAD", lim))) { - cursor += ret; - state = state_title_after_method; - method = method_head; - break; - } - - state = state_title_err; - err = err_parse_title_method; - break; - case state_title_after_method: - cursor++; - state = state_title_uri; - break; - case state_title_uri: - if ((ret = cmp_ascii(ch, lim)) <= 0) { - state = state_title_err; - err = err_parse_title_uri; - break; - } - - if (!(uri = strndup(ch, ret))) { - state = state_title_err; - err = err_memory; - break; - } - - uri_len = ret; - cursor += ret; - state = state_title_after_uri; - break; - case state_title_after_uri: - cursor++; - state = state_title_version; - break; - case state_title_version: - if ((ret = cmp_ascii(ch, lim)) <= 0) { - state = state_title_err; - err = err_parse_title_version; - break; - } - - if (!(ver = strndup(ch, ret))) { - state = state_title_err; - err = err_memory; - break; - } - - ver_len = ret; - cursor += ret; - state = state_title_ok; - break; - case state_title_ok: - for (;ch < lim;ch++) { - if (*ch == ' ') continue; - if (*ch == '\t') continue; - if (*ch == '\0') continue; - if (*ch == '\r') continue; - - state = state_title_err; - err = err_parse_title; - break; - } - break; - case state_title_err: - fprintf(stderr, "failed\n"); - return -1; - break; - default: - break; - } + // method + char *method_lim = strchr(offset, ' '); + if (!method_lim) { + return -1; } + diff = method_lim-cursor; + ret = streencmp(method_tree, cursor, diff); + if (ret == 0) { + return -1; + } + + method = ret; + cursor += diff; + + // white space + while (*cursor == ' ') { + cursor++; + } + + // uri + char *uri_lim = strchr(cursor, ' '); + if (!uri_lim) { + return -1; + } + + diff = uri_lim-cursor; + uri = cursor; + uri_len = diff; + + cursor += diff; + + // white space + while (*cursor == ' ') { + cursor++; + } + + // ver + diff = cursor_lim-cursor; + ver = cursor; + ver_len = diff; + + cursor += diff; + return 0; } @@ -217,7 +131,6 @@ int parse_request(char *buffer) { // IF END OF MESSAGE if (!header_len) { - state = state_fin; break; } @@ -227,9 +140,6 @@ int parse_request(char *buffer) { } } - free(uri); - free(ver); - return 0; } @@ -249,8 +159,20 @@ int main(void) { return -1; } + method_tree = allocnod(); + if (!method_tree) { + fprintf(stderr, "Not enough dynamic memory\n"); + return -1; + } + ret = gentree(header_tree, headers, NULL); - if (gentree < 0) { + if (ret < 0) { + fprintf(stderr, "Failed generating the header comparison tree\n"); + return -1; + } + + ret = gentree(method_tree, methods, NULL); + if (ret < 0) { fprintf(stderr, "Failed generating the header comparison tree\n"); return -1; } @@ -261,8 +183,14 @@ int main(void) { return -1; } - fprintf(stderr, "Finished parsing request\n"); + fprintf(stderr, "[+] Finished parsing request\n" + " method: %d\n" + " uri : %.*s\n" + " ver : %.*s\n", + method, uri_len, uri, ver_len, ver + ); + frenod(method_tree); frenod(header_tree); free(header_table); free(str); diff --git a/tinyparser.h b/tinyparser.h index b6d3002..0e54d21 100644 --- a/tinyparser.h +++ b/tinyparser.h @@ -13,49 +13,17 @@ // ENUMS -// SRC:https://datatracker.ietf.org/doc/html/rfc2616 -enum errs { - err_init = 0, - err_generic, - err_memory, - err_parse_title, - err_parse_title_method, - err_parse_title_uri, - err_parse_title_version, - err_parse_header_not_found -}; - -// SRC:https://datatracker.ietf.org/doc/html/rfc2616 -enum states { - state_init = 0, - state_title_method, - state_title_after_method, - state_title_uri, - state_title_after_uri, - state_title_version, - state_title_ok, - state_title_err, - - state_header_init, - state_header_alloc, - state_header_parse, - state_header_not_found, - state_header_ok, - state_header_err, - - state_body_init, - state_body_ok, - state_body_err, - - state_fin, -}; - // SRC:https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods enum methods { - method_init = 0, - method_get, + method_get = 1, + method_head, method_post, - method_head + method_put, + method_delete, + method_connect, + method_options, + method_trace, + method_patch }; // SRC:https://en.wikipedia.org/wiki/List_of_HTTP_header_fields