commit f428190016540487d39faa9961568f5c4ebbb016 Author: Kevin Jerebica Date: Fri Aug 16 01:12:06 2024 +0200 feat: implement parsing of http title diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d8ac7ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +compile_commands.json diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..50ca2fa --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +CC=gcc +CFLAGS=-g3 +CFILES=tinyparser.c +OUTPUT=tinyparser + +all: + $(CC) $(CFLAGS) $(CFILES) -o $(OUTPUT) + +run: + ./$(OUTPUT) + +clean: + rm -rf $(OUTPUT) diff --git a/README b/README new file mode 100644 index 0000000..b6e1d1a --- /dev/null +++ b/README @@ -0,0 +1,6 @@ ++================+ +| tinyparser | ++================+ + +HTTP parsing prototype and algorithm development environment for tinyproxy - my +new proxy. diff --git a/tinyparser b/tinyparser new file mode 100755 index 0000000..a3ed8f5 Binary files /dev/null and b/tinyparser differ diff --git a/tinyparser.c b/tinyparser.c new file mode 100644 index 0000000..0fbfd97 --- /dev/null +++ b/tinyparser.c @@ -0,0 +1,195 @@ +#include +#include +#include +#include "tinyparser.h" + +enum errs err = err_init; +enum states state = state_init; +enum methods method = method_init; +char *uri = NULL; +char *ver = NULL; +int uri_len = 0; +int ver_len = 0; + +int read_line(char **cursor, int cursor_size) { + int diff = 0; + char *match = strchr(*cursor, '\n'); + + if (!match) + match = (*cursor)+cursor_size-1; + + diff = match - (*cursor); + *cursor += diff + 1; + + return diff; +} + +int cmp_orig(char *ch, char *orig, char *limit) { + int diff = 0; + for (; *orig != '\0' && *ch != '\0' && ch < limit; orig++, ch++, diff++) + if (*orig != *ch) + return -1; + + return diff; +} + +int cmp_ascii(char *ch, char *limit) { + int diff = 0; + for (; *ch != '\0' && *ch != ' ' && ch < limit; ch++, diff++) { + if (*ch >= '0' && *ch <= '9') continue; + if (*ch >= 'A' && *ch <= 'Z') continue; + if (*ch >= 'a' && *ch <= 'z') continue; + if (*ch >= '#' && *ch <= '/') continue; + if (*ch == '!') continue; + if (*ch == ':') continue; + if (*ch == ';') continue; + if (*ch == '=') continue; + if (*ch == '?') continue; + if (*ch == '@') continue; + if (*ch == '[') continue; + if (*ch == ']') continue; + if (*ch == '_') continue; + if (*ch == '~') continue; + + return -1; + } + + return diff; +} + +int parse_title(char *title_offset, int title_len) { + int title_cursor = 0; + + while (state != state_title_ok && state != state_title_err) { + if (title_cursor >= title_len) { + state = state_title_err; + err = err_parse_title; + } + + int ret = 0; + char *ch = title_offset+title_cursor; + char *lim = title_offset+title_len-1; + + switch (state) { + case state_init: + state = state_title_method; + break; + case state_title_method: + if ((ret = cmp_orig(ch, "GET", lim))) { + title_cursor += ret; + state = state_title_after_method; + method = method_get; + break; + } + if ((ret = cmp_orig(ch, "POST", lim))) { + title_cursor += ret; + state = state_title_after_method; + method = method_post; + break; + } + if ((ret = cmp_orig(ch, "HEAD", lim))) { + title_cursor += ret; + state = state_title_after_method; + method = method_head; + break; + } + + state = state_title_err; + err = err_parse_title_method; + break; + case state_title_after_method: + title_cursor++; + state = state_title_uri; + break; + case state_title_uri: + if ((ret = cmp_ascii(ch, lim)) <= 0) { + state = state_title_err; + err = err_parse_title_uri; + break; + } + + if (!(uri = strndup(ch, ret))) { + state = state_title_err; + err = err_memory; + break; + } + + uri_len = ret; + title_cursor += ret; + state = state_title_after_uri; + break; + case state_title_after_uri: + title_cursor++; + state = state_title_version; + break; + case state_title_version: + if ((ret = cmp_ascii(ch, lim)) <= 0) { + state = state_title_err; + err = err_parse_title_version; + break; + } + + if (!(ver = strndup(ch, ret))) { + state = state_title_err; + err = err_memory; + break; + } + + ver_len = ret; + title_cursor += ret; + state = state_title_ok; + break; + case state_title_ok: + for (;ch < lim;ch++) { + if (*ch == ' ') continue; + if (*ch == '\t') continue; + if (*ch == '\0') continue; + + state = state_title_err; + err = err_parse_title; + break; + } + break; + case state_title_err: + fprintf(stderr, "failed\n"); + return -1; + break; + default: + break; + } + } + + return 0; +} + +int parse_request(char *buffer) { + int ret; + int buffer_size = strlen(buffer); + + char *title_offset = buffer; + int title_len = read_line(&buffer, buffer_size); + + if ((ret = parse_title(title_offset, title_len)) < 0) { + fprintf(stderr, "Failed parsing title\n"); + return -1; + } + + return 0; +} + +int main(void) { + int ret; + char *str = strdup(TEST_ONE); + + ret = parse_request(str); + if (ret < 0) { + fprintf(stderr, "Failed parsing request\n"); + return -1; + } + + fprintf(stderr, "Finished parsing request\n"); + + free(str); + + return 0; +} diff --git a/tinyparser.h b/tinyparser.h new file mode 100644 index 0000000..406c5ca --- /dev/null +++ b/tinyparser.h @@ -0,0 +1,151 @@ +#ifndef PARSER_H +#define PARSER_H + +#define LINE_BUF 1024 +#define TEST_ONE "GET / HTTP/1.1\r\n"\ + "Host: archive.0xdeadbeer.xyz\r\n"\ + "User-Agent: curl/8.9.1\r\n"\ + "Accept: */*\r\n" + +enum errs { + err_init = 0, + err_generic, + err_memory, + err_parse_title, + err_parse_title_method, + err_parse_title_uri, + err_parse_title_version +}; + +enum states { + state_init = 0, + state_title_method, + state_title_after_method, + state_title_uri, + state_title_after_uri, + state_title_version, + state_title_ok, + state_title_err +}; + +enum methods { + method_init = 0, + method_get, + method_post, + method_head +}; + +enum headers { + header_a_im = 0, + header_accept, + header_accept_charset, + header_accept_datetime, + header_accept_encoding, + header_accept_language, + header_access_control_request_method, + header_access_control_request_headers, + header_authorization, + header_cache_control, + header_connection, + header_content_encoding, + header_content_length, + header_content_md5, + header_content_type, + header_cookie, + header_date, + header_expect, + header_forwarded, + header_from, + header_host, + header_http2_settings, + header_if_match, + header_if_modified_since, + header_if_none_match, + header_if_range, + header_if_unmodified_since, + header_max_forwards, + header_origin, + header_pragma, + header_prefer, + header_proxy_authorization, + header_range, + header_referer, + header_te, + header_trailer, + header_transfer_encoding, + header_user_agent, + header_upgrade, + header_via, + header_warning, + + header_upgrade_insecure_requests, + header_x_requested_with, + header_dnt, + header_x_forwarded_for, + header_x_forwarded_host, + header_x_forwarded_proto, + header_front_end_https, + header_x_http_method_override, + header_x_att_deviceid, + header_x_wap_profile, + header_proxy_connection, + header_x_uidh, + header_x_csrf_token, + header_x_request_id, + header_x_correlation_id, + header_correlation_id, + header_save_data, + header_sec_gpc, + + header_accept_ch, + header_access_control_allow_origin, + header_access_control_allow_credentials, + header_access_control_expose_headers, + header_access_control_max_age, + header_access_control_allow_methods, + header_access_control_allow_headers, + header_accept_patch, + header_accept_ranges, + header_age, + header_allow, + header_alt_svc, + header_content_disposition, + header_content_language, + header_content_location, + header_content_range, + header_delta_base, + header_etag, + header_expires, + header_im, + header_last_modified, + header_link, + header_location, + header_p3p, + header_preference_applied, + header_proxy_authenticate, + header_public_key_pins, + header_retry_after, + header_server, + header_set_cookie, + header_strict_transport_security, + header_tk, + header_vary, + header_www_authenticate, + header_x_frame_options, + header_content_security_policy, + header_expect_ct, + header_nel, + header_permissions_policy, + header_refresh, + header_report_to, + header_status, + header_timing_allow_origin, + header_x_content_duration, + header_x_content_type_options, + header_x_powered_by, + header_x_redirect_by, + header_x_ua_compatible, + header_x_xss_protection +}; + +#endif