From 4d30b297a527259922563e239912bafe49026b71 Mon Sep 17 00:00:00 2001 From: kevinj Date: Wed, 31 Jul 2024 16:14:50 +0200 Subject: [PATCH] logic: use regex for parsing --- Makefile | 7 +- proxy.c | 109 +++++---- proxy_parse.c | 604 -------------------------------------------------- proxy_parse.h | 138 ------------ 4 files changed, 72 insertions(+), 786 deletions(-) delete mode 100644 proxy_parse.c delete mode 100644 proxy_parse.h diff --git a/Makefile b/Makefile index 14a8090..2fff5d3 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,11 @@ -CC=bear --append -- gcc -CFLAGS= -g -Wall -Werror +CC=gcc +CFLAGS= -g3 -Wall -Werror all: proxy proxy: proxy.c - $(CC) $(CFLAGS) -o proxy_parse.o -c proxy_parse.c $(CC) $(CFLAGS) -o proxy.o -c proxy.c - $(CC) $(CFLAGS) -o proxy proxy_parse.o proxy.o + $(CC) $(CFLAGS) -o proxy proxy.o clean: rm -f proxy *.o diff --git a/proxy.c b/proxy.c index e3e9337..c6eadbe 100644 --- a/proxy.c +++ b/proxy.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "structs.h" #define PROXY_PORT 2020 @@ -12,54 +13,77 @@ #define PROXY_MAX_MSGLEN 10*1024 #define TITLE_DELIM " " +#define REGEX_MATCHN 4 +#define REGEX_TITLE "^([A-Z]+)[ ]+([a-zA-Z0-9\\:\\/\\_\\-\\.\\,]+)[ ]+([a-zA-Z0-9\\_\\-\\.\\,\\/]+)[ ]*[$\n\r]" +#define REGEX_HEADER "^(.*)[$\\n\\r]" + struct http_msg *child_msg; +regex_t preg; +regmatch_t pmatch[REGEX_MATCHN]; + +void *extractsub(const char *msg, regmatch_t match) { + int buflen = match.rm_eo - match.rm_so; + void *buf = (void *) calloc(1, buflen); + if (buf == NULL) + goto _return; + + sprintf(buf, "%.*s", buflen, &msg[match.rm_so]); + +_return: + return buf; +} + +int parse_header(char *msgbuff) { + return 0; +} int parse_title(char *msgbuff) { - char *title_end = strrchr(msgbuff, '\n'); - if (title_end == NULL) { - return -1; - } + int ret; - const int title_len = title_end - msgbuff; - char title[title_len]; - strncpy(title, msgbuff, title_len); + ret = regcomp(&preg, REGEX_TITLE, REG_EXTENDED); + if (ret != 0) + goto _err; - char *title_sub; - int index; - for (index = 0, title_sub = strtok(title, TITLE_DELIM); - title_sub != NULL; - title_sub = strtok(NULL, TITLE_DELIM), index++) { - - char *destarr = (char *) calloc(1, strlen(title_sub)+1); - if (destarr == NULL) { - goto error_title_props; - } + ret = regexec(&preg, msgbuff, REGEX_MATCHN, pmatch, 0); + if (ret != 0) + goto _err; - if (index == 0) { - child_msg->method = destarr; - strcpy(child_msg->method, title_sub); - } - else if (index == 1) { - child_msg->uri = destarr; - strcpy(child_msg->uri, title_sub); - } - else if (index == 2) { - child_msg->ver = destarr; - strcpy(child_msg->ver, title_sub); - } - else { - goto error_title_props; - } - } + child_msg->method = extractsub(msgbuff, pmatch[1]); + if (child_msg->method == NULL) + goto _err; + child_msg->uri = extractsub(msgbuff, pmatch[2]); + if (child_msg->uri == NULL) + goto _err; + + child_msg->ver = extractsub(msgbuff, pmatch[3]); + if (child_msg->ver == NULL) + goto _err; + + goto _ok; + +_err: + regfree(&preg); + return -1; + +_ok: + regfree(&preg); return 0; -error_title_props: - for (int i = 0; i <= index; i++) { - free(child_msg+i); +} + +int par_line = 0; + +int parse_line(char *line, int line_count) { + int ret = 0; + + if (line_count == 0) { + ret = parse_title(line); + } else { + ret = parse_header(line); } - return -1; + return ret; } void handle_request(int sockfd) { @@ -83,14 +107,19 @@ void handle_request(int sockfd) { goto end_sock; } + for (char *ln = strtok(msgbuff, "\n"); ln != NULL; ln = strtok(NULL, "\n"), par_line++) { + parse_line(ln, par_line); + } + // start parsing - ret = parse_title(msgbuff); + /*ret = parse_title(msgbuff); if (ret < 0) { fprintf(stderr, "[CHILD %d] Failed to parse the title of the request\n", id); goto end_structs; - } + }*/ -end_structs: + +//end_structs: free(child_msg); end_sock: diff --git a/proxy_parse.c b/proxy_parse.c deleted file mode 100644 index 65302da..0000000 --- a/proxy_parse.c +++ /dev/null @@ -1,604 +0,0 @@ -/* - proxy_parse.c -- a HTTP Request Parsing Library. - COS 461 -*/ - -#include "proxy_parse.h" - -#define DEFAULT_NHDRS 8 -#define MAX_REQ_LEN 65535 -#define MIN_REQ_LEN 4 - -static const char *root_abs_path = "/"; - -/* private function declartions */ -int ParsedRequest_printRequestLine(struct ParsedRequest *pr, - char * buf, size_t buflen, - size_t *tmp); -size_t ParsedRequest_requestLineLen(struct ParsedRequest *pr); - -/* - * debug() prints out debugging info if DEBUG is set to 1 - * - * parameter format: same as printf - * - */ -void debug(const char * format, ...) { - va_list args; - if (DEBUG) { - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - } -} - - -/* - * ParsedHeader Public Methods - */ - -/* Set a header with key and value */ -int ParsedHeader_set(struct ParsedRequest *pr, - const char * key, const char * value) -{ - struct ParsedHeader *ph; - ParsedHeader_remove (pr, key); - - if (pr->headerslen <= pr->headersused+1) { - pr->headerslen = pr->headerslen * 2; - pr->headers = - (struct ParsedHeader *)realloc(pr->headers, - pr->headerslen * sizeof(struct ParsedHeader)); - if (!pr->headers) - return -1; - } - - ph = pr->headers + pr->headersused; - pr->headersused += 1; - - ph->key = (char *)malloc(strlen(key)+1); - memcpy(ph->key, key, strlen(key)); - ph->key[strlen(key)] = '\0'; - - ph->value = (char *)malloc(strlen(value)+1); - memcpy(ph->value, value, strlen(value)); - ph->value[strlen(value)] = '\0'; - - ph->keylen = strlen(key)+1; - ph->valuelen = strlen(value)+1; - return 0; -} - - -/* get the parsedHeader with the specified key or NULL */ -struct ParsedHeader* ParsedHeader_get(struct ParsedRequest *pr, - const char * key) -{ - size_t i = 0; - struct ParsedHeader * tmp; - while(pr->headersused > i) - { - tmp = pr->headers + i; - if(tmp->key && key && strcmp(tmp->key, key) == 0) - { - return tmp; - } - i++; - } - return NULL; -} - -/* remove the specified key from parsedHeader */ -int ParsedHeader_remove(struct ParsedRequest *pr, const char *key) -{ - struct ParsedHeader *tmp; - tmp = ParsedHeader_get(pr, key); - if(tmp == NULL) - return -1; - - free(tmp->key); - free(tmp->value); - tmp->key = NULL; - return 0; -} - - -/* modify the header with given key, giving it a new value - * return 1 on success and 0 if no such header found - * -int ParsedHeader_modify(struct ParsedRequest *pr, const char * key, - const char *newValue) -{ - struct ParsedHeader *tmp; - tmp = ParsedHeader_get(pr, key); - if(tmp != NULL) - { - if(tmp->valuelen < strlen(newValue)+1) - { - tmp->valuelen = strlen(newValue)+1; - tmp->value = (char *) realloc(tmp->value, - tmp->valuelen * sizeof(char)); - } - strcpy(tmp->value, newValue); - return 1; - } - return 0; -} -*/ - -/* - ParsedHeader Private Methods -*/ - -void ParsedHeader_create(struct ParsedRequest *pr) -{ - pr->headers = - (struct ParsedHeader *)malloc(sizeof(struct ParsedHeader)*DEFAULT_NHDRS); - pr->headerslen = DEFAULT_NHDRS; - pr->headersused = 0; -} - - -size_t ParsedHeader_lineLen(struct ParsedHeader * ph) -{ - if(ph->key != NULL) - { - return strlen(ph->key)+strlen(ph->value)+4; - } - return 0; -} - -size_t ParsedHeader_headersLen(struct ParsedRequest *pr) -{ - if (!pr || !pr->buf) - return 0; - - size_t i = 0; - int len = 0; - while(pr->headersused > i) - { - len += ParsedHeader_lineLen(pr->headers + i); - i++; - } - len += 2; - return len; -} - -int ParsedHeader_printHeaders(struct ParsedRequest * pr, char * buf, - size_t len) -{ - char * current = buf; - struct ParsedHeader * ph; - size_t i = 0; - - if(len < ParsedHeader_headersLen(pr)) - { - debug("buffer for printing headers too small\n"); - return -1; - } - - while(pr->headersused > i) - { - ph = pr->headers+i; - if (ph->key) { - memcpy(current, ph->key, strlen(ph->key)); - memcpy(current+strlen(ph->key), ": ", 2); - memcpy(current+strlen(ph->key) +2 , ph->value, - strlen(ph->value)); - memcpy(current+strlen(ph->key) +2+strlen(ph->value) , - "\r\n", 2); - current += strlen(ph->key)+strlen(ph->value)+4; - } - i++; - } - memcpy(current, "\r\n",2); - return 0; -} - - -void ParsedHeader_destroyOne(struct ParsedHeader * ph) -{ - if(ph->key != NULL) - { - free(ph->key); - ph->key = NULL; - free(ph->value); - ph->value = NULL; - ph->keylen = 0; - ph->valuelen = 0; - } -} - -void ParsedHeader_destroy(struct ParsedRequest * pr) -{ - size_t i = 0; - while(pr->headersused > i) - { - ParsedHeader_destroyOne(pr->headers + i); - i++; - } - pr->headersused = 0; - - free(pr->headers); - pr->headerslen = 0; -} - - -int ParsedHeader_parse(struct ParsedRequest * pr, char * line) -{ - char * key; - char * value; - char * index1; - char * index2; - - index1 = index(line, ':'); - if(index1 == NULL) - { - debug("No colon found\n"); - return -1; - } - key = (char *)malloc((index1-line+1)*sizeof(char)); - memcpy(key, line, index1-line); - key[index1-line]='\0'; - - index1 += 2; - index2 = strstr(index1, "\r\n"); - value = (char *) malloc((index2-index1+1)*sizeof(char)); - memcpy(value, index1, (index2-index1)); - value[index2-index1] = '\0'; - - ParsedHeader_set(pr, key, value); - free(key); - free(value); - return 0; -} - -/* - ParsedRequest Public Methods -*/ - -void ParsedRequest_destroy(struct ParsedRequest *pr) -{ - if(pr->buf != NULL) - { - free(pr->buf); - } - if (pr->path != NULL) { - free(pr->path); - } - if(pr->headerslen > 0) - { - ParsedHeader_destroy(pr); - } - free(pr); -} - -struct ParsedRequest* ParsedRequest_create() -{ - struct ParsedRequest *pr; - pr = (struct ParsedRequest *)malloc(sizeof(struct ParsedRequest)); - if (pr != NULL) - { - ParsedHeader_create(pr); - pr->buf = NULL; - pr->method = NULL; - pr->protocol = NULL; - pr->host = NULL; - pr->path = NULL; - pr->version = NULL; - pr->buf = NULL; - pr->buflen = 0; - } - return pr; -} - -/* - Recreate the entire buffer from a parsed request object. - buf must be allocated -*/ -int ParsedRequest_unparse(struct ParsedRequest *pr, char *buf, - size_t buflen) -{ - if (!pr || !pr->buf) - return -1; - - size_t tmp; - if (ParsedRequest_printRequestLine(pr, buf, buflen, &tmp) < 0) - return -1; - if (ParsedHeader_printHeaders(pr, buf+tmp, buflen-tmp) < 0) - return -1; - return 0; -} - -/* - Recreate the headers from a parsed request object. - buf must be allocated -*/ -int ParsedRequest_unparse_headers(struct ParsedRequest *pr, char *buf, - size_t buflen) -{ - if (!pr || !pr->buf) - return -1; - - if (ParsedHeader_printHeaders(pr, buf, buflen) < 0) - return -1; - return 0; -} - - -/* Size of the headers if unparsed into a string */ -size_t ParsedRequest_totalLen(struct ParsedRequest *pr) -{ - if (!pr || !pr->buf) - return 0; - return ParsedRequest_requestLineLen(pr)+ParsedHeader_headersLen(pr); -} - - -/* - Parse request buffer - - Parameters: - parse: ptr to a newly created ParsedRequest object - buf: ptr to the buffer containing the request (need not be NUL terminated) - and the trailing \r\n\r\n - buflen: length of the buffer including the trailing \r\n\r\n - - Return values: - -1: failure - 0: success -*/ -int -ParsedRequest_parse(struct ParsedRequest * parse, const char *buf, - int buflen) -{ - char *full_addr; - char *saveptr; - char *index; - char *currentHeader; - - if (parse->buf != NULL) { - debug("parse object already assigned to a request\n"); - return -1; - } - - if (buflen < MIN_REQ_LEN || buflen > MAX_REQ_LEN) { - debug("invalid buflen %d", buflen); - return -1; - } - - /* Create NUL terminated tmp buffer */ - char *tmp_buf = (char *)malloc(buflen + 1); /* including NUL */ - memcpy(tmp_buf, buf, buflen); - tmp_buf[buflen] = '\0'; - - index = strstr(tmp_buf, "\r\n\r\n"); - if (index == NULL) { - debug("invalid request line, no end of header\n"); - free(tmp_buf); - return -1; - } - - /* Copy request line into parse->buf */ - index = strstr(tmp_buf, "\r\n"); - if (parse->buf == NULL) { - parse->buf = (char *) malloc((index-tmp_buf)+1); - parse->buflen = (index-tmp_buf)+1; - } - memcpy(parse->buf, tmp_buf, index-tmp_buf); - parse->buf[index-tmp_buf] = '\0'; - - /* Parse request line */ - parse->method = strtok_r(parse->buf, " ", &saveptr); - if (parse->method == NULL) { - debug( "invalid request line, no whitespace\n"); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - if (strcmp (parse->method, "GET")) { - debug( "invalid request line, method not 'GET': %s\n", - parse->method); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - - full_addr = strtok_r(NULL, " ", &saveptr); - - if (full_addr == NULL) { - debug( "invalid request line, no full address\n"); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - - parse->version = full_addr + strlen(full_addr) + 1; - - if (parse->version == NULL) { - debug( "invalid request line, missing version\n"); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - if (strncmp (parse->version, "HTTP/", 5)) { - debug( "invalid request line, unsupported version %s\n", - parse->version); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - - - parse->protocol = strtok_r(full_addr, "://", &saveptr); - if (parse->protocol == NULL) { - debug( "invalid request line, missing host\n"); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - - const char *rem = full_addr + strlen(parse->protocol) + strlen("://"); - size_t abs_uri_len = strlen(rem); - - parse->host = strtok_r(NULL, "/", &saveptr); - if (parse->host == NULL) { - debug( "invalid request line, missing host\n"); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - - if (strlen(parse->host) == abs_uri_len) { - debug("invalid request line, missing absolute path\n"); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - return -1; - } - - parse->path = strtok_r(NULL, " ", &saveptr); - if (parse->path == NULL) { // replace empty abs_path with "/" - int rlen = strlen(root_abs_path); - parse->path = (char *)malloc(rlen + 1); - strncpy(parse->path, root_abs_path, rlen + 1); - } else if (strncmp(parse->path, root_abs_path, strlen(root_abs_path)) == 0) { - debug("invalid request line, path cannot begin " - "with two slash characters\n"); - free(tmp_buf); - free(parse->buf); - parse->buf = NULL; - parse->path = NULL; - return -1; - } else { - // copy parse->path, prefix with a slash - char *tmp_path = parse->path; - int rlen = strlen(root_abs_path); - int plen = strlen(parse->path); - parse->path = (char *)malloc(rlen + plen + 1); - strncpy(parse->path, root_abs_path, rlen); - strncpy(parse->path + rlen, tmp_path, plen + 1); - } - - parse->host = strtok_r(parse->host, ":", &saveptr); - parse->port = strtok_r(NULL, "/", &saveptr); - - if (parse->host == NULL) { - debug( "invalid request line, missing host\n"); - free(tmp_buf); - free(parse->buf); - free(parse->path); - parse->buf = NULL; - parse->path = NULL; - return -1; - } - - if (parse->port != NULL) { - int port = strtol (parse->port, (char **)NULL, 10); - if (port == 0 && errno == EINVAL) { - debug("invalid request line, bad port: %s\n", parse->port); - free(tmp_buf); - free(parse->buf); - free(parse->path); - parse->buf = NULL; - parse->path = NULL; - return -1; - } - } - - - /* Parse headers */ - int ret = 0; - currentHeader = strstr(tmp_buf, "\r\n")+2; - while (currentHeader[0] != '\0' && - !(currentHeader[0] == '\r' && currentHeader[1] == '\n')) { - - //debug("line %s %s", parse->version, currentHeader); - - if (ParsedHeader_parse(parse, currentHeader)) { - ret = -1; - break; - } - - currentHeader = strstr(currentHeader, "\r\n"); - if (currentHeader == NULL || strlen (currentHeader) < 2) - break; - - currentHeader += 2; - } - free(tmp_buf); - return ret; -} - -/* - ParsedRequest Private Methods -*/ - -size_t ParsedRequest_requestLineLen(struct ParsedRequest *pr) -{ - if (!pr || !pr->buf) - return 0; - - size_t len = - strlen(pr->method) + 1 + strlen(pr->protocol) + 3 + - strlen(pr->host) + 1 + strlen(pr->version) + 2; - if(pr->port != NULL) - { - len += strlen(pr->port)+1; - } - /* path is at least a slash */ - len += strlen(pr->path); - return len; -} - -int ParsedRequest_printRequestLine(struct ParsedRequest *pr, - char * buf, size_t buflen, - size_t *tmp) -{ - char * current = buf; - - if(buflen < ParsedRequest_requestLineLen(pr)) - { - debug("not enough memory for first line\n"); - return -1; - } - memcpy(current, pr->method, strlen(pr->method)); - current += strlen(pr->method); - current[0] = ' '; - current += 1; - - memcpy(current, pr->protocol, strlen(pr->protocol)); - current += strlen(pr->protocol); - memcpy(current, "://", 3); - current += 3; - memcpy(current, pr->host, strlen(pr->host)); - current += strlen(pr->host); - if(pr->port != NULL) - { - current[0] = ':'; - current += 1; - memcpy(current, pr->port, strlen(pr->port)); - current += strlen(pr->port); - } - /* path is at least a slash */ - memcpy(current, pr->path, strlen(pr->path)); - current += strlen(pr->path); - - current[0] = ' '; - current += 1; - - memcpy(current, pr->version, strlen(pr->version)); - current += strlen(pr->version); - memcpy(current, "\r\n", 2); - current +=2; - *tmp = current-buf; - return 0; -} - diff --git a/proxy_parse.h b/proxy_parse.h deleted file mode 100644 index 6f47347..0000000 --- a/proxy_parse.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * proxy_parse.h -- a HTTP Request Parsing Library. - * - * Written by: Matvey Arye - * For: COS 518 - * - */ - -#include -#include -#include -#include -#include - -#include - -#ifndef PROXY_PARSE -#define PROXY_PARSE - -#define DEBUG 1 - -/* - ParsedRequest objects are created from parsing a buffer containing a HTTP - request. The request buffer consists of a request line followed by a number - of headers. Request line fields such as method, protocol etc. are stored - explicitly. Headers such as 'Content-Length' and their values are maintained - in a linked list. Each node in this list is a ParsedHeader and contains a - key-value pair. - - The buf and buflen fields are used internally to maintain the parsed request - line. - */ -struct ParsedRequest { - char *method; - char *protocol; - char *host; - char *port; - char *path; - char *version; - char *buf; - size_t buflen; - struct ParsedHeader *headers; - size_t headersused; - size_t headerslen; -}; - -/* - ParsedHeader: any header after the request line is a key-value pair with the - format "key:value\r\n" and is maintained in the ParsedHeader linked list - within ParsedRequest -*/ -struct ParsedHeader { - char * key; - size_t keylen; - char * value; - size_t valuelen; -}; - - -/* Create an empty parsing object to be used exactly once for parsing a single - * request buffer */ -struct ParsedRequest* ParsedRequest_create(); - -/* Parse the request buffer in buf given that buf is of length buflen */ -int ParsedRequest_parse(struct ParsedRequest * parse, const char *buf, - int buflen); - -/* Destroy the parsing object. */ -void ParsedRequest_destroy(struct ParsedRequest *pr); - -/* - Retrieve the entire buffer from a parsed request object. buf must be an - allocated buffer of size buflen, with enough space to write the request - line, headers and the trailing \r\n. buf will not be NUL terminated by - unparse(). - */ -int ParsedRequest_unparse(struct ParsedRequest *pr, char *buf, - size_t buflen); - -/* - Retrieve the entire buffer with the exception of request line from a parsed - request object. buf must be an allocated buffer of size buflen, with enough - space to write the headers and the trailing \r\n. buf will not be NUL - terminated by unparse(). If there are no headers, the trailing \r\n is - unparsed. - */ -int ParsedRequest_unparse_headers(struct ParsedRequest *pr, char *buf, - size_t buflen); - -/* Total length including request line, headers and the trailing \r\n*/ -size_t ParsedRequest_totalLen(struct ParsedRequest *pr); - -/* Length including headers, if any, and the trailing \r\n but excluding the - * request line. - */ -size_t ParsedHeader_headersLen(struct ParsedRequest *pr); - -/* Set, get, and remove null-terminated header keys and values */ -int ParsedHeader_set(struct ParsedRequest *pr, const char * key, - const char * value); -struct ParsedHeader* ParsedHeader_get(struct ParsedRequest *pr, - const char * key); -int ParsedHeader_remove (struct ParsedRequest *pr, const char * key); - -/* debug() prints out debugging info if DEBUG is set to 1 */ -void debug(const char * format, ...); - -/* Example usage: - - const char *c = - "GET http://www.google.com:80/index.html/ HTTP/1.0\r\nContent-Length:" - " 80\r\nIf-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT\r\n\r\n"; - - int len = strlen(c); - if (ParsedRequest_parse(req, c, len) < 0) { - printf("parse failed\n"); - return -1; - } - - printf("Method:%s\n", req->method); - printf("Host:%s\n", req->host); - - int rlen = ParsedRequest_totalLen(req); - char *b = (char *)malloc(rlen+1); - if (ParsedRequest_unparse(req, b, rlen) < 0) { - printf("unparse failed\n"); - return -1; - } - b[rlen]='\0'; - - struct ParsedHeader *r = ParsedHeader_get(req, "If-Modified-Since"); - printf("Modified value: %s\n", r->value); - - ParsedRequest_destroy(req); -*/ - -#endif -