feat: implement parsing of http title

This commit is contained in:
Kevin J. 2024-08-16 01:12:06 +02:00
commit f428190016
6 changed files with 367 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*.o
compile_commands.json

13
Makefile Normal file
View File

@ -0,0 +1,13 @@
CC=gcc
CFLAGS=-g3
CFILES=tinyparser.c
OUTPUT=tinyparser
all:
$(CC) $(CFLAGS) $(CFILES) -o $(OUTPUT)
run:
./$(OUTPUT)
clean:
rm -rf $(OUTPUT)

6
README Normal file
View File

@ -0,0 +1,6 @@
+================+
| tinyparser |
+================+
HTTP parsing prototype and algorithm development environment for tinyproxy - my
new proxy.

BIN
tinyparser Executable file

Binary file not shown.

195
tinyparser.c Normal file
View File

@ -0,0 +1,195 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "tinyparser.h"
enum errs err = err_init;
enum states state = state_init;
enum methods method = method_init;
char *uri = NULL;
char *ver = NULL;
int uri_len = 0;
int ver_len = 0;
int read_line(char **cursor, int cursor_size) {
int diff = 0;
char *match = strchr(*cursor, '\n');
if (!match)
match = (*cursor)+cursor_size-1;
diff = match - (*cursor);
*cursor += diff + 1;
return diff;
}
int cmp_orig(char *ch, char *orig, char *limit) {
int diff = 0;
for (; *orig != '\0' && *ch != '\0' && ch < limit; orig++, ch++, diff++)
if (*orig != *ch)
return -1;
return diff;
}
int cmp_ascii(char *ch, char *limit) {
int diff = 0;
for (; *ch != '\0' && *ch != ' ' && ch < limit; ch++, diff++) {
if (*ch >= '0' && *ch <= '9') continue;
if (*ch >= 'A' && *ch <= 'Z') continue;
if (*ch >= 'a' && *ch <= 'z') continue;
if (*ch >= '#' && *ch <= '/') continue;
if (*ch == '!') continue;
if (*ch == ':') continue;
if (*ch == ';') continue;
if (*ch == '=') continue;
if (*ch == '?') continue;
if (*ch == '@') continue;
if (*ch == '[') continue;
if (*ch == ']') continue;
if (*ch == '_') continue;
if (*ch == '~') continue;
return -1;
}
return diff;
}
int parse_title(char *title_offset, int title_len) {
int title_cursor = 0;
while (state != state_title_ok && state != state_title_err) {
if (title_cursor >= title_len) {
state = state_title_err;
err = err_parse_title;
}
int ret = 0;
char *ch = title_offset+title_cursor;
char *lim = title_offset+title_len-1;
switch (state) {
case state_init:
state = state_title_method;
break;
case state_title_method:
if ((ret = cmp_orig(ch, "GET", lim))) {
title_cursor += ret;
state = state_title_after_method;
method = method_get;
break;
}
if ((ret = cmp_orig(ch, "POST", lim))) {
title_cursor += ret;
state = state_title_after_method;
method = method_post;
break;
}
if ((ret = cmp_orig(ch, "HEAD", lim))) {
title_cursor += ret;
state = state_title_after_method;
method = method_head;
break;
}
state = state_title_err;
err = err_parse_title_method;
break;
case state_title_after_method:
title_cursor++;
state = state_title_uri;
break;
case state_title_uri:
if ((ret = cmp_ascii(ch, lim)) <= 0) {
state = state_title_err;
err = err_parse_title_uri;
break;
}
if (!(uri = strndup(ch, ret))) {
state = state_title_err;
err = err_memory;
break;
}
uri_len = ret;
title_cursor += ret;
state = state_title_after_uri;
break;
case state_title_after_uri:
title_cursor++;
state = state_title_version;
break;
case state_title_version:
if ((ret = cmp_ascii(ch, lim)) <= 0) {
state = state_title_err;
err = err_parse_title_version;
break;
}
if (!(ver = strndup(ch, ret))) {
state = state_title_err;
err = err_memory;
break;
}
ver_len = ret;
title_cursor += ret;
state = state_title_ok;
break;
case state_title_ok:
for (;ch < lim;ch++) {
if (*ch == ' ') continue;
if (*ch == '\t') continue;
if (*ch == '\0') continue;
state = state_title_err;
err = err_parse_title;
break;
}
break;
case state_title_err:
fprintf(stderr, "failed\n");
return -1;
break;
default:
break;
}
}
return 0;
}
int parse_request(char *buffer) {
int ret;
int buffer_size = strlen(buffer);
char *title_offset = buffer;
int title_len = read_line(&buffer, buffer_size);
if ((ret = parse_title(title_offset, title_len)) < 0) {
fprintf(stderr, "Failed parsing title\n");
return -1;
}
return 0;
}
int main(void) {
int ret;
char *str = strdup(TEST_ONE);
ret = parse_request(str);
if (ret < 0) {
fprintf(stderr, "Failed parsing request\n");
return -1;
}
fprintf(stderr, "Finished parsing request\n");
free(str);
return 0;
}

151
tinyparser.h Normal file
View File

@ -0,0 +1,151 @@
#ifndef PARSER_H
#define PARSER_H
#define LINE_BUF 1024
#define TEST_ONE "GET / HTTP/1.1\r\n"\
"Host: archive.0xdeadbeer.xyz\r\n"\
"User-Agent: curl/8.9.1\r\n"\
"Accept: */*\r\n"
enum errs {
err_init = 0,
err_generic,
err_memory,
err_parse_title,
err_parse_title_method,
err_parse_title_uri,
err_parse_title_version
};
enum states {
state_init = 0,
state_title_method,
state_title_after_method,
state_title_uri,
state_title_after_uri,
state_title_version,
state_title_ok,
state_title_err
};
enum methods {
method_init = 0,
method_get,
method_post,
method_head
};
enum headers {
header_a_im = 0,
header_accept,
header_accept_charset,
header_accept_datetime,
header_accept_encoding,
header_accept_language,
header_access_control_request_method,
header_access_control_request_headers,
header_authorization,
header_cache_control,
header_connection,
header_content_encoding,
header_content_length,
header_content_md5,
header_content_type,
header_cookie,
header_date,
header_expect,
header_forwarded,
header_from,
header_host,
header_http2_settings,
header_if_match,
header_if_modified_since,
header_if_none_match,
header_if_range,
header_if_unmodified_since,
header_max_forwards,
header_origin,
header_pragma,
header_prefer,
header_proxy_authorization,
header_range,
header_referer,
header_te,
header_trailer,
header_transfer_encoding,
header_user_agent,
header_upgrade,
header_via,
header_warning,
header_upgrade_insecure_requests,
header_x_requested_with,
header_dnt,
header_x_forwarded_for,
header_x_forwarded_host,
header_x_forwarded_proto,
header_front_end_https,
header_x_http_method_override,
header_x_att_deviceid,
header_x_wap_profile,
header_proxy_connection,
header_x_uidh,
header_x_csrf_token,
header_x_request_id,
header_x_correlation_id,
header_correlation_id,
header_save_data,
header_sec_gpc,
header_accept_ch,
header_access_control_allow_origin,
header_access_control_allow_credentials,
header_access_control_expose_headers,
header_access_control_max_age,
header_access_control_allow_methods,
header_access_control_allow_headers,
header_accept_patch,
header_accept_ranges,
header_age,
header_allow,
header_alt_svc,
header_content_disposition,
header_content_language,
header_content_location,
header_content_range,
header_delta_base,
header_etag,
header_expires,
header_im,
header_last_modified,
header_link,
header_location,
header_p3p,
header_preference_applied,
header_proxy_authenticate,
header_public_key_pins,
header_retry_after,
header_server,
header_set_cookie,
header_strict_transport_security,
header_tk,
header_vary,
header_www_authenticate,
header_x_frame_options,
header_content_security_policy,
header_expect_ct,
header_nel,
header_permissions_policy,
header_refresh,
header_report_to,
header_status,
header_timing_allow_origin,
header_x_content_duration,
header_x_content_type_options,
header_x_powered_by,
header_x_redirect_by,
header_x_ua_compatible,
header_x_xss_protection
};
#endif