Compare commits

..

3 Commits

Author SHA1 Message Date
f0f11e111b repo: update readme 2024-09-17 18:49:01 +02:00
dbcf0d8ef4 proxy: implement a relaying system
also remove now defunct functions +
rewrite state machine
2024-09-17 18:40:01 +02:00
009913283c feat: cover chunked encoding for body segmentation 2024-09-15 01:52:51 +02:00
5 changed files with 283 additions and 266 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
*.o
proxy
compile_commands.json
.cache

20
README
View File

@ -4,12 +4,17 @@
Proxy C library for HTTP/s protocol
NOTE: I have yet to turn this piece of clutter into a library.
RFC: datatracker.ietf.org/doc/html/rfc1945
RES: cs.princeton.edu/courses/archive/spr13/cos461/assignments-proxy.html
RES: Beej's Guide to Network Programming - Using Internet Sockets
RES: en.wikipedia.org/wiki/Proxy_server
RES: TCP/IP sockets in C - Practical guide for programmers 2nd edition
RES: tinyproxy.github.io/
RES: github.com/nginx/nginx
TECHNICALS
* Parsing is carried by a dedicated sub-library which I wrote named parslib.
@ -18,6 +23,10 @@ TECHNICALS
no matter the amount of strings you want to compare it against (I am indeed
aware it is nonetheless worse than hashtables - maybe one day I will implement
those on my own as well).
* After the connection is established with the upstream server, a relay loop
is started in which the client data is relayed to the server's socket and
server data is relayed to client's socket. This is highly inspired from
tinyproxy.
TASKS
@ -26,15 +35,16 @@ TASKS
[DONE] Implement client message parsing
[DONE] Implement server message parsing
[DONE] Add loose string checking for headers
[DONE] Relaying mechanism
[DOING] Cover all possible body segmentation standards
[DONE] Cover Content-Length
[DOING] Cover "chunked transfer encoding"
[DOING] Cover "compress transfer encoding"
[DOING] Cover "deflate transfer encoding"
[DOING] Cover "gzip/x-gzip transfer encoding"
[DONE] Cover "chunked transfer encoding"
[TODO] Cover "compress transfer encoding"
[TODO] Cover "deflate transfer encoding"
[TODO] Cover "gzip/x-gzip transfer encoding"
[ACTIVELY DOING] More testing, debugging, fixing
[ACTIVELY DOING] Verify and search for memory leaks
[TODO] Implement HTTPS with OpenSSL, LibreSSL, or BearSSL
[TODO] Implement HTTPS
[TODO] Caching?
COMMITS

BIN
proxlib

Binary file not shown.

491
proxlib.c
View File

@ -7,12 +7,14 @@
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <poll.h>
#include "proxlib.h"
#include "parslib/parslib.h"
int on = 1;
int debug = 3;
int statem;
int debug = 1;
int statem = 0;
int err = 0;
#define SEGMENT_LEN 512
#define MAX_BUFF_LEN 128 * 1024
@ -92,181 +94,107 @@ int read_line(int fd,
return 0;
}
int parse_line(char *line, int line_count) {
int pull_content_length(int fd, int len, int *msgbuff_len, char **msgbuff) {
int ret = 0;
return ret;
}
void do_err(void) {
int statem_code = statem & (~STATEM_ERR);
fprintf(stderr, "[%d,%d,%d] Errored out!\n", statem, statem_code,
STATEM_ERR);
}
int do_fwd_clt(struct conn *conn) {
int bytes = 0;
int ret = 0;
while (bytes < conn->srvbuff_len) {
ret = write(conn->cltfd, conn->srvbuff+bytes, conn->srvbuff_len-bytes);
if (ret < 0)
return -1;
bytes += ret;
}
return 0;
}
int do_rcv_srv(struct conn *conn) {
int ret = 0;
char *line = NULL;
char *msgbuff = NULL;
int line_len = 0;
int msgbuff_len = 0;
// response line
ret = read_line(conn->srvfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving response line from upstream\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] received line: %s\n", line);
}
ret = parestitl(line, line_len, &(conn->srvres.titl));
if (ret < 0) {
fprintf(stderr, "Failed parsing response line\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] parsed response line\n");
}
free(line);
// headers
int next_header = 1;
while (next_header) {
ret = read_line(conn->srvfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving header line\n");
return -1;
}
if (line_len == 0) {
if (debug == 1) {
fprintf(stdout, "debug - [upstream] reached end of headers\n");
}
next_header = 0;
continue;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] received line: %s\n", line);
}
ret = parshfield(line, line_len, conn->srvres.hentries);
if (ret < 0) {
fprintf(stderr, "Failed parsing header field\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed header field\n");
}
free(line);
}
// body
body:
struct httpares *res = &conn->srvres;
struct point *content_length_entry = &res->hentries[header_content_length];
if (content_length_entry->er == NULL) {
fprintf(stderr, "[upstream] no content length header\n");
return -1;
}
int content_length = 0;
ret = stoin(content_length_entry->er, content_length_entry->len, &content_length);
if (ret < 0) {
fprintf(stderr, "[upstream] failed parsing content length header\n");
return -1;
}
line_len = content_length;
line = (char *) calloc(1, line_len);
int line_len = len;
char *line = (char *) calloc(1, line_len);
if (!line) {
fprintf(stderr, "[upstream] not enough dynamic memory\n");
return -1;
return ERR_MEM;
}
int bytes = 0;
do {
ret = recv(conn->srvfd, line+bytes, line_len-bytes, MSG_WAITALL);
ret = recv(fd, line+bytes, line_len-bytes, MSG_WAITALL);
if (ret < 0) {
fprintf(stderr, "[upstream] failed reading body from response\n");
return -1;
return ERR_MEM;
}
bytes += ret;
} while (bytes < line_len);
msgbuff = (char *) realloc(msgbuff, msgbuff_len+line_len);
if (!msgbuff) {
fprintf(stderr, "[upstream] not enough dynamic memory\n");
return -1;
*msgbuff = (char *) realloc(*msgbuff, *msgbuff_len+line_len);
if (!*msgbuff) {
return ERR_MEM;
}
memcpy(msgbuff+msgbuff_len, line, line_len);
msgbuff_len += line_len;
memcpy(*msgbuff+*msgbuff_len, line, line_len);
*msgbuff_len += line_len;
if (debug <= 2) {
fprintf(stdout, "------------------------------\n");
fprintf(stdout, "debug - [upstream] received body %d: %.*s\n", line_len, line_len, line);
fprintf(stdout, "------------------------------\n");
}
return 0;
}
if (debug <= 2) {
fprintf(stdout, "printing parsed response\n");
printfpares(&conn->srvres);
}
int pull_chunked_encoding(int fd, int *msgbuff_len, char **msgbuff) {
int ret = 0;
char *line = NULL;
int line_len = 0;
conn->srvbuff = msgbuff;
conn->srvbuff_len = msgbuff_len;
while (1) {
ret = read_line(fd, &line_len, &line, msgbuff_len, msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving chunked body from upstream\n");
return -1;
}
return 0;
line_len = strtol(line, (char **) 0, 16);
if (!line_len) {
break;
}
line_len += 2;
free(line);
line = (char *) calloc(1, line_len);
if (!line) {
fprintf(stderr, "Not enough dynamic memory\n");
return -1;
}
int bytes = 0;
do {
ret = recv(fd, line+bytes, line_len-bytes, MSG_WAITALL);
if (ret < 0) {
fprintf(stderr, "Failed reading respones body from server\n");
return -1;
}
bytes += ret;
} while (bytes < line_len);
*msgbuff = (char *) realloc(*msgbuff, *msgbuff_len+line_len);
if (!msgbuff) {
fprintf(stderr, "Not enough dynamic memory\n");
return -1;
}
memcpy(*msgbuff+*msgbuff_len, line, line_len);
*msgbuff_len += line_len;
free(line);
}
return 0;
}
void do_err(void) {
fprintf(stderr, "failed with error code %d\n", err);
}
int do_con_srv(struct conn *conn) {
statem = state_con_srv;
int ret = 0;
struct httpareq *req = &conn->cltreq;
struct point *host = &req->hentries[header_host];
if (host->er == NULL) {
if (debug <= 2) {
fprintf(stderr, "debug - request does not have HOST header\n");
}
goto _exit;
return ERR_PARS;
}
struct hostinfo *info = (struct hostinfo *) calloc(1, sizeof(struct hostinfo));
if (!info) {
goto _exit;
return ERR_MEM;
}
ret = pahostinfo(host->er, host->len, info);
if (ret < 0) {
if (debug <= 2) {
fprintf(stderr, "Failed parsing upstream host header\n");
}
goto _exit_hostinfo;
}
if (debug <= 2) {
fprintf(stdout, "Establishing connection with upstream: %.*s : %.*s\n", info->hostname_len, info->hostname, info->service_len, info->service);
return ERR_PARS;
}
struct addrinfo hints;
@ -278,45 +206,37 @@ int do_con_srv(struct conn *conn) {
ret = getaddrinfo(info->hostname, info->service, &hints, &res);
if (ret < 0) {
goto _exit_hostinfo;
free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
}
ret = conn->srvfd = socket(res->ai_family, res->ai_socktype,
res->ai_protocol);
if (ret < 0) {
goto _exit_getaddrinfo;
freeaddrinfo(res);
free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
}
ret = connect(conn->srvfd, res->ai_addr, res->ai_addrlen);
if (ret < 0) {
goto _exit_getaddrinfo;
freeaddrinfo(res);
free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
}
_exit_getaddrinfo:
freeaddrinfo(res);
_exit_hostinfo:
free(info->hostname);
free(info->service);
free(info);
_exit:
return ret;
}
int do_fwd_srv(struct conn *conn) {
int bytes = 0;
int ret = 0;
while (bytes < conn->cltbuff_len) {
ret = write(conn->srvfd, conn->cltbuff+bytes, conn->cltbuff_len-bytes);
if (ret < 0)
return -1;
bytes += ret;
}
return 0;
}
int do_rcv_clt(struct conn *conn) {
statem = state_rcv_clt;
int ret = 0;
char *line = NULL;
char *msgbuff = NULL;
@ -326,22 +246,12 @@ int do_rcv_clt(struct conn *conn) {
// request line
ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving request line\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - received line: %s\n", line);
return ERR_RECV;
}
ret = pareqtitl(line, line_len, &(conn->cltreq.titl));
if (ret < 0) {
fprintf(stderr, "Failed parsing request line\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed request line\n");
return ERR_PARSTITLE;
}
free(line);
@ -351,94 +261,170 @@ int do_rcv_clt(struct conn *conn) {
while (next_header) {
ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving header line\n");
return -1;
return ERR_RECV;
}
if (line_len == 0) {
if (debug == 1) {
fprintf(stdout, "debug - reached end of headers\n");
}
next_header = 0;
continue;
}
if (debug == 1) {
fprintf(stdout, "debug - received line: %s\n", line);
}
ret = parshfield(line, line_len, conn->cltreq.hentries);
if (ret < 0) {
fprintf(stderr, "Failed parsing header field\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed header field\n");
return ERR_PARSHEADER;
}
free(line);
}
// body
// TODO
struct httpareq *req = &conn->cltreq;
struct point *content_length_entry = &req->hentries[header_content_length];
struct point *transfer_encoding_entry = &req->hentries[header_transfer_encoding];
if (content_length_entry->er) {
int content_length = 0;
if (debug <= 2) {
fprintf(stdout, "printing parsed request\n");
printfpareq(&conn->cltreq);
}
ret = stoin(content_length_entry->er, content_length_entry->len, &content_length);
if (ret < 0) {
return ERR_PARS;
}
ret = pull_content_length(conn->srvfd, content_length, &msgbuff_len, &msgbuff);
if (ret < 0) {
return ERR_RECV;
}
} else if (transfer_encoding_entry->er && strcmp(transfer_encoding_entry->er, "chunked") == 0) {
ret = pull_chunked_encoding(conn->srvfd, &msgbuff_len, &msgbuff);
if (ret < 0) {
return ERR_RECV;
}
}
conn->cltbuff = msgbuff;
conn->cltbuff_len = msgbuff_len;
return 0;
}
void do_clear(struct conn *conn) {
statem = STATEM_RCV_CLT;
frepareq(&conn->cltreq);
frepares(&conn->srvres);
free(conn->cltbuff);
free(conn->srvbuff);
}
int read_buffer(int fd, char **buff, int *len) {
char *tmp = (char *) malloc(RELAY_BUFFER_SIZE);
if (!tmp) {
return ERR_MEM;
}
memset(tmp, 0, RELAY_BUFFER_SIZE);
int bytes = recv(fd, tmp, RELAY_BUFFER_SIZE, 0);
if (bytes <= 0) {
free(tmp);
return ERR_RECV;
}
*buff = realloc(*buff, *len+bytes);
if (!*buff) {
free(tmp);
return ERR_MEM;
}
memcpy(*buff+*len, tmp, bytes);
*len += bytes;
return 0;
}
int write_buffer(int fd, char **buff, int *len) {
if (*len <= 0) {
*len = 0;
return 0;
}
int writen = send(fd, *buff, *len, 0);
if (writen < 0) {
return ERR_SEND;
}
char *trunc = (char *) malloc(*len-writen);
if (!trunc) {
return ERR_MEM;
}
memcpy(trunc, *buff+writen, *len-writen);
char *tofree = *buff; // FIXME: any better solution?
*buff = trunc;
*len -= writen;
free(tofree);
return 0;
}
void do_statem(struct conn *conn) {
int ret = 0;
int ret = 0;
for (int counter = 0; counter < MAX_BOUND; counter++) {
switch (statem & (~STATEM_ERR)) {
case STATEM_RCV_CLT:
ret = do_rcv_clt(conn);
break;
case STATEM_CON_SRV:
ret = do_con_srv(conn);
break;
case STATEM_FWD_SRV:
ret = do_fwd_srv(conn);
break;
case STATEM_RCV_SRV:
ret = do_rcv_srv(conn);
break;
case STATEM_FWD_CLT:
ret = do_fwd_clt(conn);
break;
}
if (ret < 0)
statem |= STATEM_ERR;
if (statem & STATEM_ERR) {
do_err();
break;
}
if (statem & STATEM_FWD_CLT) {
do_clear(conn);
continue;
}
statem <<= 1;
ret = do_rcv_clt(conn);
if (ret < 0) {
err = ret;
do_err();
}
ret = do_con_srv(conn);
if (ret < 0) {
err = ret;
do_err();
}
// relay the data between the two sockets until the end of time
ssize_t bytes_received;
struct pollfd fds[2];
for (;;) {
memset(fds, 0, 2*sizeof(struct pollfd));
fds[0].fd = conn->cltfd;
fds[1].fd = conn->srvfd;
if (conn->srvbuff_len > 0) {
fds[0].events |= POLLOUT;
}
if (conn->cltbuff_len > 0) {
fds[1].events |= POLLOUT;
}
if (!conn->srvbuff_len) {
fds[1].events |= POLLIN;
}
if (!conn->cltbuff_len) {
fds[0].events |= POLLIN;
}
ret = poll(fds, 2, 1000);
if (fds[1].revents & POLLOUT) {
ret = write_buffer(conn->srvfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (fds[1].revents & POLLIN) {
ret = read_buffer(conn->srvfd, &conn->srvbuff, &conn->srvbuff_len);
}
if (fds[0].revents & POLLIN) {
ret = read_buffer(conn->cltfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (fds[0].revents & POLLOUT) {
ret = write_buffer(conn->cltfd, &conn->srvbuff, &conn->srvbuff_len);
}
if (fds[0].revents & POLLHUP) {
break;
}
if (ret < 0) {
break;
}
}
if (conn->cltbuff_len > 0) {
write_buffer(conn->srvfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (conn->srvbuff_len > 0) {
write_buffer(conn->cltfd, &conn->srvbuff, &conn->srvbuff_len);
}
close(conn->cltfd);
close(conn->srvfd);
exit(0); // child die
}
int do_srv(void) {
@ -476,9 +462,8 @@ int do_srv(void) {
return -1;
}
fprintf(stdout, "Listening on port %d\n", PROXY_PORT);
for (;;) {
fprintf(stdout, "listening for sockets\n");
struct sockaddr_in new_clt_addr;
socklen_t new_clt_addr_len= sizeof(new_clt_addr);
int new_clt_sock;
@ -490,6 +475,7 @@ int do_srv(void) {
"client\n");
return -1;
}
fprintf(stdout, "accepted new client socket\n");
ret = fork();
if (ret < 0) {
@ -499,12 +485,11 @@ int do_srv(void) {
}
if (ret > 0) {
fprintf(stdout, "[PROGRAM] Successfully forked a new child process"
" with PID %d\n", ret);
fprintf(stdout, "+new request process:%d(pid)\n", ret);
continue;
}
// child
// request process
struct conn *conn = (struct conn *) calloc(1, sizeof(struct conn));
if (!conn) {
fprintf(stderr, "Not enough dynamic memory to establish connection\n");
@ -512,14 +497,9 @@ int do_srv(void) {
}
conn->cltfd = new_clt_sock;
statem = STATEM_RCV_CLT;
statem = state_rcv_clt;
do_statem(conn);
free(conn);
if (debug == 1) {
fprintf(stdout, "Finished proxying client\n");
}
return 0;
}
@ -533,7 +513,10 @@ int main(int argc, char *argv[]) {
return -1;
}
return do_srv();
ret = do_srv();
if (ret < 0) {
return -1;
}
fretres();
}

View File

@ -4,18 +4,41 @@
#include "parslib/parslib.h"
#ifndef MAX_BOUND
#define MAX_BOUND 10000
#define MAX_BOUND 100000000000
#endif
#define PROXY_PORT 2020
#define PROXY_CONN 20
#define RELAY_BUFFER_SIZE 1024*2
#define RELAY_POLL_TIMEOUT 1000
#define STATEM_RCV_CLT 0b00000001
#define STATEM_CON_SRV 0b00000010
#define STATEM_FWD_SRV 0b00000100
#define STATEM_RCV_SRV 0b00001000
#define STATEM_FWD_CLT 0b00010000
#define STATEM_ERR 0b00100000
enum states {
state_rcv_clt = 0,
state_con_srv,
state_fwd_srv,
state_rcv_srv,
state_fwd_clt,
state_ok
};
#define ERR_GENERIC -1
#define ERR_MEM -2
#define ERR_RECV -3
#define ERR_SEND -4
#define ERR_PARS -5
#define ERR_PARSTITLE -6
#define ERR_PARSHEADER -7
#define ERR_SUPPORT -8
#define ERR_TIMEOUT -9
char *states_str[] = {
"state_rcv_clt",
"state_con_srv",
"state_fwd_srv",
"state_rcv_srv",
"state_fwd_clt",
"state_ok"
};
struct conn {
int cltfd;