diff --git a/.gitignore b/.gitignore index d8ac7ef..4fc5420 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.o compile_commands.json +tinyparser diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..43538c2 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "streecmp"] + path = streecmp + url = http://git.0xdeadbeer.xyz/0xdeadbeer/streecmp diff --git a/Makefile b/Makefile index 50ca2fa..1d7bc82 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,11 @@ CC=gcc CFLAGS=-g3 -CFILES=tinyparser.c +CFILES=tinyparser.c headers.c +CFILES_STREECMP=streecmp/streecmp.c OUTPUT=tinyparser all: - $(CC) $(CFLAGS) $(CFILES) -o $(OUTPUT) + $(CC) $(CFLAGS) $(CFILES) $(CFILES_STREECMP) -o $(OUTPUT) run: ./$(OUTPUT) diff --git a/headers.c b/headers.c new file mode 100644 index 0000000..ccb1e79 --- /dev/null +++ b/headers.c @@ -0,0 +1,108 @@ +char *headers = "A-IM\n" + "Accept\n" + "Accept-Charset\n" + "Accept-Datetime\n" + "Accept-Encoding\n" + "Accept-Language\n" + "Access-Control-Request-Method\n" + "Access-Control-Request-Headers\n" + "Authorization\n" + "Cache-Control\n" + "Connection\n" + "Content-Encoding\n" + "Content-Length\n" + "Content-MD5\n" + "Content-Type\n" + "Cookie\n" + "Date\n" + "Expect\n" + "Forwarded\n" + "From\n" + "Host\n" + "HTTP2-Settings\n" + "If-Match\n" + "If-Modified-Since\n" + "If-None-Match\n" + "If-Range\n" + "If-Unmodified-Since\n" + "Max-Forwards\n" + "Origin\n" + "Pragma\n" + "Prefer\n" + "Proxy-Authorization\n" + "Range\n" + "Referer\n" + "TE\n" + "Trailer\n" + "Transfer-Encoding\n" + "User-Agent\n" + "Upgrade\n" + "Via\n" + "Warning\n" + "Upgrade-Insecure-Requests\n" + "X-Requested-With\n" + "DNT\n" + "X-Forwarded-For\n" + "X-Forwarded-Host\n" + "X-Forwarded-Proto\n" + "Front-End-Https\n" + "X-Http-Method-Override\n" + "X-ATT-DeviceID\n" + "X-Wap-Profile\n" + "Proxy-Connection\n" + "X-UIDH\n" + "X-Csrf-Token\n" + "X-Request-ID\n" + "X-Correlation-ID\n" + "Correlation-ID\n" + "Save-Data\n" + "Sec-GPC\n" + "Accept-CH\n" + "Access-Control-Allow-Origin\n" + "Access-Control-Allow-Credentials\n" + "Access-Control-Expose-Headers\n" + "Access-Control-Max-Age\n" + "Access-Control-Allow-Methods\n" + "Access-Control-Allow-Headers\n" + "Accept-Patch\n" + "Accept-Ranges\n" + "Age\n" + "Allow\n" + "Alt-Svc\n" + "Content-Disposition\n" + "Content-Language\n" + "Content-Location\n" + "Content-Range\n" + "Delta-Base\n" + "ETag\n" + "Expires\n" + "IM\n" + "Last-Modified\n" + "Link\n" + "Location\n" + "P3P\n" + "Preference-Applied\n" + "Proxy-Authenticate\n" + "Public-Key-Pins\n" + "Retry-After\n" + "Server\n" + "Set-Cookie\n" + "Strict-Transport-Security\n" + "Tk\n" + "Vary\n" + "WWW-Authenticate\n" + "X-Frame-Options\n" + "Content-Security-Policy\n" + "Expect-CT\n" + "NEL\n" + "Permissions-Policy\n" + "Refresh\n" + "Report-To\n" + "Status\n" + "Timing-Allow-Origin\n" + "X-Content-Duration\n" + "X-Content-Type-Options\n" + "X-Powered-By\n" + "X-Redirect-By\n" + "X-UA-Compatible\n" + "X-XSS-Protection"; diff --git a/streecmp b/streecmp new file mode 160000 index 0000000..42ed963 --- /dev/null +++ b/streecmp @@ -0,0 +1 @@ +Subproject commit 42ed96353d186078e34e4c541e7c92d433cda138 diff --git a/tinyparser b/tinyparser index a3ed8f5..27b4e40 100755 Binary files a/tinyparser and b/tinyparser differ diff --git a/tinyparser.c b/tinyparser.c index 0fbfd97..19bc727 100644 --- a/tinyparser.c +++ b/tinyparser.c @@ -2,6 +2,9 @@ #include #include #include "tinyparser.h" +#include "streecmp/streecmp.h" + +extern char *headers; enum errs err = err_init; enum states state = state_init; @@ -11,17 +14,25 @@ char *ver = NULL; int uri_len = 0; int ver_len = 0; -int read_line(char **cursor, int cursor_size) { - int diff = 0; - char *match = strchr(*cursor, '\n'); +struct nod *header_tree = NULL; +char **header_table = NULL; - if (!match) - match = (*cursor)+cursor_size-1; - - diff = match - (*cursor); - *cursor += diff + 1; - - return diff; +int read_line(char **buffer, char **buffer_limit) { + int diff = 0; + if ((*buffer) >= (*buffer_limit)) { + return diff; + } + + int ret = 0; + char *match = strstr(*buffer, "\r\n"); + if (match) { + ret = match-(*buffer); + *buffer += 2; + } else { + ret = (*buffer_limit)-(*buffer); + } + *buffer += ret; + return ret; } int cmp_orig(char *ch, char *orig, char *limit) { @@ -57,18 +68,43 @@ int cmp_ascii(char *ch, char *limit) { return diff; } -int parse_title(char *title_offset, int title_len) { - int title_cursor = 0; +int parse_header(char *offset, int len) { + int cursor = 0; + int ret = 0; + char *header_limit = offset+len; + char *sep = strchr(offset, ':'); + if (!sep) { + return -1; + } + + if (sep > header_limit) { + return -1; + } + + int htitle_len = sep-offset; + ret = streencmp(header_tree, offset, htitle_len); + if (ret == 0) { + return -1; + } + + char *hvalue = sep+1; + header_table[ret] = hvalue; + + return 0; +} + +int parse_title(char *offset, int len) { + int cursor = 0; while (state != state_title_ok && state != state_title_err) { - if (title_cursor >= title_len) { + if (cursor >= len) { state = state_title_err; err = err_parse_title; } int ret = 0; - char *ch = title_offset+title_cursor; - char *lim = title_offset+title_len-1; + char *ch = offset+cursor; + char *lim = offset+len-1; switch (state) { case state_init: @@ -76,19 +112,19 @@ int parse_title(char *title_offset, int title_len) { break; case state_title_method: if ((ret = cmp_orig(ch, "GET", lim))) { - title_cursor += ret; + cursor += ret; state = state_title_after_method; method = method_get; break; } if ((ret = cmp_orig(ch, "POST", lim))) { - title_cursor += ret; + cursor += ret; state = state_title_after_method; method = method_post; break; } if ((ret = cmp_orig(ch, "HEAD", lim))) { - title_cursor += ret; + cursor += ret; state = state_title_after_method; method = method_head; break; @@ -98,7 +134,7 @@ int parse_title(char *title_offset, int title_len) { err = err_parse_title_method; break; case state_title_after_method: - title_cursor++; + cursor++; state = state_title_uri; break; case state_title_uri: @@ -115,11 +151,11 @@ int parse_title(char *title_offset, int title_len) { } uri_len = ret; - title_cursor += ret; + cursor += ret; state = state_title_after_uri; break; case state_title_after_uri: - title_cursor++; + cursor++; state = state_title_version; break; case state_title_version: @@ -136,7 +172,7 @@ int parse_title(char *title_offset, int title_len) { } ver_len = ret; - title_cursor += ret; + cursor += ret; state = state_title_ok; break; case state_title_ok: @@ -144,6 +180,7 @@ int parse_title(char *title_offset, int title_len) { if (*ch == ' ') continue; if (*ch == '\t') continue; if (*ch == '\0') continue; + if (*ch == '\r') continue; state = state_title_err; err = err_parse_title; @@ -164,16 +201,35 @@ int parse_title(char *title_offset, int title_len) { int parse_request(char *buffer) { int ret; - int buffer_size = strlen(buffer); + char *buffer_limit = buffer+strlen(buffer); char *title_offset = buffer; - int title_len = read_line(&buffer, buffer_size); - + int title_len = read_line(&buffer, &buffer_limit); if ((ret = parse_title(title_offset, title_len)) < 0) { fprintf(stderr, "Failed parsing title\n"); return -1; } + for (int bound = 0; bound < MAX_BOUND; bound++) { + char *header_offset = buffer; + int header_len = read_line(&buffer, &buffer_limit); + char *header_limit = header_offset+header_len; + + // IF END OF MESSAGE + if (!header_len) { + state = state_fin; + break; + } + + if ((ret = parse_header(header_offset, header_len)) < 0) { + fprintf(stderr, "Failed parsing header\n"); + return -1; + } + } + + free(uri); + free(ver); + return 0; } @@ -181,6 +237,24 @@ int main(void) { int ret; char *str = strdup(TEST_ONE); + header_table = (char **) calloc(header_count, sizeof(char *)); + if (!header_table) { + fprintf(stderr, "Not enough dynamic memory\n"); + return -1; + } + + header_tree = allocnod(); + if (!header_tree) { + fprintf(stderr, "Not enough dynamic memory\n"); + return -1; + } + + ret = gentree(header_tree, headers, NULL); + if (gentree < 0) { + fprintf(stderr, "Failed generating the header comparison tree\n"); + return -1; + } + ret = parse_request(str); if (ret < 0) { fprintf(stderr, "Failed parsing request\n"); @@ -189,6 +263,8 @@ int main(void) { fprintf(stderr, "Finished parsing request\n"); + frenod(header_tree); + free(header_table); free(str); return 0; diff --git a/tinyparser.h b/tinyparser.h index 406c5ca..b6d3002 100644 --- a/tinyparser.h +++ b/tinyparser.h @@ -1,12 +1,19 @@ #ifndef PARSER_H #define PARSER_H +#define CRLF "\r\n" +#define MAX_BOUND 1024 #define LINE_BUF 1024 #define TEST_ONE "GET / HTTP/1.1\r\n"\ "Host: archive.0xdeadbeer.xyz\r\n"\ "User-Agent: curl/8.9.1\r\n"\ - "Accept: */*\r\n" + "Accept: */*\r\n"\ + "\r\n"\ + "{\"key\": \"kefjoiawejfojgorgjbosejrgo\"}"\ +// ENUMS + +// SRC:https://datatracker.ietf.org/doc/html/rfc2616 enum errs { err_init = 0, err_generic, @@ -14,9 +21,11 @@ enum errs { err_parse_title, err_parse_title_method, err_parse_title_uri, - err_parse_title_version + err_parse_title_version, + err_parse_header_not_found }; +// SRC:https://datatracker.ietf.org/doc/html/rfc2616 enum states { state_init = 0, state_title_method, @@ -25,9 +34,23 @@ enum states { state_title_after_uri, state_title_version, state_title_ok, - state_title_err + state_title_err, + + state_header_init, + state_header_alloc, + state_header_parse, + state_header_not_found, + state_header_ok, + state_header_err, + + state_body_init, + state_body_ok, + state_body_err, + + state_fin, }; +// SRC:https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods enum methods { method_init = 0, method_get, @@ -35,8 +58,9 @@ enum methods { method_head }; +// SRC:https://en.wikipedia.org/wiki/List_of_HTTP_header_fields enum headers { - header_a_im = 0, + header_a_im = 1, header_accept, header_accept_charset, header_accept_datetime, @@ -77,7 +101,6 @@ enum headers { header_upgrade, header_via, header_warning, - header_upgrade_insecure_requests, header_x_requested_with, header_dnt, @@ -96,7 +119,6 @@ enum headers { header_correlation_id, header_save_data, header_sec_gpc, - header_accept_ch, header_access_control_allow_origin, header_access_control_allow_credentials, @@ -145,7 +167,9 @@ enum headers { header_x_powered_by, header_x_redirect_by, header_x_ua_compatible, - header_x_xss_protection + header_x_xss_protection, + + header_count }; #endif