Compare commits

...

3 Commits

Author SHA1 Message Date
f0f11e111b repo: update readme 2024-09-17 18:49:01 +02:00
dbcf0d8ef4 proxy: implement a relaying system
also remove now defunct functions +
rewrite state machine
2024-09-17 18:40:01 +02:00
009913283c feat: cover chunked encoding for body segmentation 2024-09-15 01:52:51 +02:00
5 changed files with 283 additions and 266 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
*.o *.o
proxy proxy
compile_commands.json compile_commands.json
.cache

20
README
View File

@ -4,12 +4,17 @@
Proxy C library for HTTP/s protocol Proxy C library for HTTP/s protocol
NOTE: I have yet to turn this piece of clutter into a library.
RFC: datatracker.ietf.org/doc/html/rfc1945 RFC: datatracker.ietf.org/doc/html/rfc1945
RES: cs.princeton.edu/courses/archive/spr13/cos461/assignments-proxy.html RES: cs.princeton.edu/courses/archive/spr13/cos461/assignments-proxy.html
RES: Beej's Guide to Network Programming - Using Internet Sockets RES: Beej's Guide to Network Programming - Using Internet Sockets
RES: en.wikipedia.org/wiki/Proxy_server RES: en.wikipedia.org/wiki/Proxy_server
RES: TCP/IP sockets in C - Practical guide for programmers 2nd edition RES: TCP/IP sockets in C - Practical guide for programmers 2nd edition
RES: tinyproxy.github.io/
RES: github.com/nginx/nginx
TECHNICALS TECHNICALS
* Parsing is carried by a dedicated sub-library which I wrote named parslib. * Parsing is carried by a dedicated sub-library which I wrote named parslib.
@ -18,6 +23,10 @@ TECHNICALS
no matter the amount of strings you want to compare it against (I am indeed no matter the amount of strings you want to compare it against (I am indeed
aware it is nonetheless worse than hashtables - maybe one day I will implement aware it is nonetheless worse than hashtables - maybe one day I will implement
those on my own as well). those on my own as well).
* After the connection is established with the upstream server, a relay loop
is started in which the client data is relayed to the server's socket and
server data is relayed to client's socket. This is highly inspired from
tinyproxy.
TASKS TASKS
@ -26,15 +35,16 @@ TASKS
[DONE] Implement client message parsing [DONE] Implement client message parsing
[DONE] Implement server message parsing [DONE] Implement server message parsing
[DONE] Add loose string checking for headers [DONE] Add loose string checking for headers
[DONE] Relaying mechanism
[DOING] Cover all possible body segmentation standards [DOING] Cover all possible body segmentation standards
[DONE] Cover Content-Length [DONE] Cover Content-Length
[DOING] Cover "chunked transfer encoding" [DONE] Cover "chunked transfer encoding"
[DOING] Cover "compress transfer encoding" [TODO] Cover "compress transfer encoding"
[DOING] Cover "deflate transfer encoding" [TODO] Cover "deflate transfer encoding"
[DOING] Cover "gzip/x-gzip transfer encoding" [TODO] Cover "gzip/x-gzip transfer encoding"
[ACTIVELY DOING] More testing, debugging, fixing [ACTIVELY DOING] More testing, debugging, fixing
[ACTIVELY DOING] Verify and search for memory leaks [ACTIVELY DOING] Verify and search for memory leaks
[TODO] Implement HTTPS with OpenSSL, LibreSSL, or BearSSL [TODO] Implement HTTPS
[TODO] Caching? [TODO] Caching?
COMMITS COMMITS

BIN
proxlib

Binary file not shown.

491
proxlib.c
View File

@ -7,12 +7,14 @@
#include <arpa/inet.h> #include <arpa/inet.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <netdb.h> #include <netdb.h>
#include <poll.h>
#include "proxlib.h" #include "proxlib.h"
#include "parslib/parslib.h" #include "parslib/parslib.h"
int on = 1; int on = 1;
int debug = 3; int debug = 1;
int statem; int statem = 0;
int err = 0;
#define SEGMENT_LEN 512 #define SEGMENT_LEN 512
#define MAX_BUFF_LEN 128 * 1024 #define MAX_BUFF_LEN 128 * 1024
@ -92,181 +94,107 @@ int read_line(int fd,
return 0; return 0;
} }
int parse_line(char *line, int line_count) { int pull_content_length(int fd, int len, int *msgbuff_len, char **msgbuff) {
int ret = 0; int ret = 0;
int line_len = len;
return ret; char *line = (char *) calloc(1, line_len);
}
void do_err(void) {
int statem_code = statem & (~STATEM_ERR);
fprintf(stderr, "[%d,%d,%d] Errored out!\n", statem, statem_code,
STATEM_ERR);
}
int do_fwd_clt(struct conn *conn) {
int bytes = 0;
int ret = 0;
while (bytes < conn->srvbuff_len) {
ret = write(conn->cltfd, conn->srvbuff+bytes, conn->srvbuff_len-bytes);
if (ret < 0)
return -1;
bytes += ret;
}
return 0;
}
int do_rcv_srv(struct conn *conn) {
int ret = 0;
char *line = NULL;
char *msgbuff = NULL;
int line_len = 0;
int msgbuff_len = 0;
// response line
ret = read_line(conn->srvfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving response line from upstream\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] received line: %s\n", line);
}
ret = parestitl(line, line_len, &(conn->srvres.titl));
if (ret < 0) {
fprintf(stderr, "Failed parsing response line\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] parsed response line\n");
}
free(line);
// headers
int next_header = 1;
while (next_header) {
ret = read_line(conn->srvfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving header line\n");
return -1;
}
if (line_len == 0) {
if (debug == 1) {
fprintf(stdout, "debug - [upstream] reached end of headers\n");
}
next_header = 0;
continue;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] received line: %s\n", line);
}
ret = parshfield(line, line_len, conn->srvres.hentries);
if (ret < 0) {
fprintf(stderr, "Failed parsing header field\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed header field\n");
}
free(line);
}
// body
body:
struct httpares *res = &conn->srvres;
struct point *content_length_entry = &res->hentries[header_content_length];
if (content_length_entry->er == NULL) {
fprintf(stderr, "[upstream] no content length header\n");
return -1;
}
int content_length = 0;
ret = stoin(content_length_entry->er, content_length_entry->len, &content_length);
if (ret < 0) {
fprintf(stderr, "[upstream] failed parsing content length header\n");
return -1;
}
line_len = content_length;
line = (char *) calloc(1, line_len);
if (!line) { if (!line) {
fprintf(stderr, "[upstream] not enough dynamic memory\n"); return ERR_MEM;
return -1;
} }
int bytes = 0; int bytes = 0;
do { do {
ret = recv(conn->srvfd, line+bytes, line_len-bytes, MSG_WAITALL); ret = recv(fd, line+bytes, line_len-bytes, MSG_WAITALL);
if (ret < 0) { if (ret < 0) {
fprintf(stderr, "[upstream] failed reading body from response\n"); return ERR_MEM;
return -1;
} }
bytes += ret; bytes += ret;
} while (bytes < line_len); } while (bytes < line_len);
msgbuff = (char *) realloc(msgbuff, msgbuff_len+line_len); *msgbuff = (char *) realloc(*msgbuff, *msgbuff_len+line_len);
if (!msgbuff) { if (!*msgbuff) {
fprintf(stderr, "[upstream] not enough dynamic memory\n"); return ERR_MEM;
return -1;
} }
memcpy(msgbuff+msgbuff_len, line, line_len); memcpy(*msgbuff+*msgbuff_len, line, line_len);
msgbuff_len += line_len; *msgbuff_len += line_len;
if (debug <= 2) { return 0;
fprintf(stdout, "------------------------------\n"); }
fprintf(stdout, "debug - [upstream] received body %d: %.*s\n", line_len, line_len, line);
fprintf(stdout, "------------------------------\n");
}
if (debug <= 2) { int pull_chunked_encoding(int fd, int *msgbuff_len, char **msgbuff) {
fprintf(stdout, "printing parsed response\n"); int ret = 0;
printfpares(&conn->srvres); char *line = NULL;
} int line_len = 0;
conn->srvbuff = msgbuff; while (1) {
conn->srvbuff_len = msgbuff_len; ret = read_line(fd, &line_len, &line, msgbuff_len, msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving chunked body from upstream\n");
return -1;
}
return 0; line_len = strtol(line, (char **) 0, 16);
if (!line_len) {
break;
}
line_len += 2;
free(line);
line = (char *) calloc(1, line_len);
if (!line) {
fprintf(stderr, "Not enough dynamic memory\n");
return -1;
}
int bytes = 0;
do {
ret = recv(fd, line+bytes, line_len-bytes, MSG_WAITALL);
if (ret < 0) {
fprintf(stderr, "Failed reading respones body from server\n");
return -1;
}
bytes += ret;
} while (bytes < line_len);
*msgbuff = (char *) realloc(*msgbuff, *msgbuff_len+line_len);
if (!msgbuff) {
fprintf(stderr, "Not enough dynamic memory\n");
return -1;
}
memcpy(*msgbuff+*msgbuff_len, line, line_len);
*msgbuff_len += line_len;
free(line);
}
return 0;
}
void do_err(void) {
fprintf(stderr, "failed with error code %d\n", err);
} }
int do_con_srv(struct conn *conn) { int do_con_srv(struct conn *conn) {
statem = state_con_srv;
int ret = 0; int ret = 0;
struct httpareq *req = &conn->cltreq; struct httpareq *req = &conn->cltreq;
struct point *host = &req->hentries[header_host]; struct point *host = &req->hentries[header_host];
if (host->er == NULL) { if (host->er == NULL) {
if (debug <= 2) { return ERR_PARS;
fprintf(stderr, "debug - request does not have HOST header\n");
}
goto _exit;
} }
struct hostinfo *info = (struct hostinfo *) calloc(1, sizeof(struct hostinfo)); struct hostinfo *info = (struct hostinfo *) calloc(1, sizeof(struct hostinfo));
if (!info) { if (!info) {
goto _exit; return ERR_MEM;
} }
ret = pahostinfo(host->er, host->len, info); ret = pahostinfo(host->er, host->len, info);
if (ret < 0) { if (ret < 0) {
if (debug <= 2) { return ERR_PARS;
fprintf(stderr, "Failed parsing upstream host header\n");
}
goto _exit_hostinfo;
}
if (debug <= 2) {
fprintf(stdout, "Establishing connection with upstream: %.*s : %.*s\n", info->hostname_len, info->hostname, info->service_len, info->service);
} }
struct addrinfo hints; struct addrinfo hints;
@ -278,45 +206,37 @@ int do_con_srv(struct conn *conn) {
ret = getaddrinfo(info->hostname, info->service, &hints, &res); ret = getaddrinfo(info->hostname, info->service, &hints, &res);
if (ret < 0) { if (ret < 0) {
goto _exit_hostinfo; free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
} }
ret = conn->srvfd = socket(res->ai_family, res->ai_socktype, ret = conn->srvfd = socket(res->ai_family, res->ai_socktype,
res->ai_protocol); res->ai_protocol);
if (ret < 0) { if (ret < 0) {
goto _exit_getaddrinfo; freeaddrinfo(res);
free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
} }
ret = connect(conn->srvfd, res->ai_addr, res->ai_addrlen); ret = connect(conn->srvfd, res->ai_addr, res->ai_addrlen);
if (ret < 0) { if (ret < 0) {
goto _exit_getaddrinfo; freeaddrinfo(res);
free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
} }
_exit_getaddrinfo:
freeaddrinfo(res);
_exit_hostinfo:
free(info->hostname);
free(info->service);
free(info);
_exit:
return ret; return ret;
} }
int do_fwd_srv(struct conn *conn) {
int bytes = 0;
int ret = 0;
while (bytes < conn->cltbuff_len) {
ret = write(conn->srvfd, conn->cltbuff+bytes, conn->cltbuff_len-bytes);
if (ret < 0)
return -1;
bytes += ret;
}
return 0;
}
int do_rcv_clt(struct conn *conn) { int do_rcv_clt(struct conn *conn) {
statem = state_rcv_clt;
int ret = 0; int ret = 0;
char *line = NULL; char *line = NULL;
char *msgbuff = NULL; char *msgbuff = NULL;
@ -326,22 +246,12 @@ int do_rcv_clt(struct conn *conn) {
// request line // request line
ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff); ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) { if (ret < 0) {
fprintf(stderr, "Failed receiving request line\n"); return ERR_RECV;
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - received line: %s\n", line);
} }
ret = pareqtitl(line, line_len, &(conn->cltreq.titl)); ret = pareqtitl(line, line_len, &(conn->cltreq.titl));
if (ret < 0) { if (ret < 0) {
fprintf(stderr, "Failed parsing request line\n"); return ERR_PARSTITLE;
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed request line\n");
} }
free(line); free(line);
@ -351,94 +261,170 @@ int do_rcv_clt(struct conn *conn) {
while (next_header) { while (next_header) {
ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff); ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) { if (ret < 0) {
fprintf(stderr, "Failed receiving header line\n"); return ERR_RECV;
return -1;
} }
if (line_len == 0) { if (line_len == 0) {
if (debug == 1) {
fprintf(stdout, "debug - reached end of headers\n");
}
next_header = 0; next_header = 0;
continue; continue;
} }
if (debug == 1) {
fprintf(stdout, "debug - received line: %s\n", line);
}
ret = parshfield(line, line_len, conn->cltreq.hentries); ret = parshfield(line, line_len, conn->cltreq.hentries);
if (ret < 0) { if (ret < 0) {
fprintf(stderr, "Failed parsing header field\n"); return ERR_PARSHEADER;
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed header field\n");
} }
free(line); free(line);
} }
// body // body
// TODO struct httpareq *req = &conn->cltreq;
struct point *content_length_entry = &req->hentries[header_content_length];
struct point *transfer_encoding_entry = &req->hentries[header_transfer_encoding];
if (content_length_entry->er) {
int content_length = 0;
if (debug <= 2) { ret = stoin(content_length_entry->er, content_length_entry->len, &content_length);
fprintf(stdout, "printing parsed request\n"); if (ret < 0) {
printfpareq(&conn->cltreq); return ERR_PARS;
} }
ret = pull_content_length(conn->srvfd, content_length, &msgbuff_len, &msgbuff);
if (ret < 0) {
return ERR_RECV;
}
} else if (transfer_encoding_entry->er && strcmp(transfer_encoding_entry->er, "chunked") == 0) {
ret = pull_chunked_encoding(conn->srvfd, &msgbuff_len, &msgbuff);
if (ret < 0) {
return ERR_RECV;
}
}
conn->cltbuff = msgbuff; conn->cltbuff = msgbuff;
conn->cltbuff_len = msgbuff_len; conn->cltbuff_len = msgbuff_len;
return 0; return 0;
} }
void do_clear(struct conn *conn) { int read_buffer(int fd, char **buff, int *len) {
statem = STATEM_RCV_CLT; char *tmp = (char *) malloc(RELAY_BUFFER_SIZE);
frepareq(&conn->cltreq); if (!tmp) {
frepares(&conn->srvres); return ERR_MEM;
free(conn->cltbuff); }
free(conn->srvbuff);
} memset(tmp, 0, RELAY_BUFFER_SIZE);
int bytes = recv(fd, tmp, RELAY_BUFFER_SIZE, 0);
if (bytes <= 0) {
free(tmp);
return ERR_RECV;
}
*buff = realloc(*buff, *len+bytes);
if (!*buff) {
free(tmp);
return ERR_MEM;
}
memcpy(*buff+*len, tmp, bytes);
*len += bytes;
return 0;
}
int write_buffer(int fd, char **buff, int *len) {
if (*len <= 0) {
*len = 0;
return 0;
}
int writen = send(fd, *buff, *len, 0);
if (writen < 0) {
return ERR_SEND;
}
char *trunc = (char *) malloc(*len-writen);
if (!trunc) {
return ERR_MEM;
}
memcpy(trunc, *buff+writen, *len-writen);
char *tofree = *buff; // FIXME: any better solution?
*buff = trunc;
*len -= writen;
free(tofree);
return 0;
}
void do_statem(struct conn *conn) { void do_statem(struct conn *conn) {
int ret = 0; int ret = 0;
for (int counter = 0; counter < MAX_BOUND; counter++) { ret = do_rcv_clt(conn);
switch (statem & (~STATEM_ERR)) { if (ret < 0) {
case STATEM_RCV_CLT: err = ret;
ret = do_rcv_clt(conn); do_err();
break;
case STATEM_CON_SRV:
ret = do_con_srv(conn);
break;
case STATEM_FWD_SRV:
ret = do_fwd_srv(conn);
break;
case STATEM_RCV_SRV:
ret = do_rcv_srv(conn);
break;
case STATEM_FWD_CLT:
ret = do_fwd_clt(conn);
break;
}
if (ret < 0)
statem |= STATEM_ERR;
if (statem & STATEM_ERR) {
do_err();
break;
}
if (statem & STATEM_FWD_CLT) {
do_clear(conn);
continue;
}
statem <<= 1;
} }
ret = do_con_srv(conn);
if (ret < 0) {
err = ret;
do_err();
}
// relay the data between the two sockets until the end of time
ssize_t bytes_received;
struct pollfd fds[2];
for (;;) {
memset(fds, 0, 2*sizeof(struct pollfd));
fds[0].fd = conn->cltfd;
fds[1].fd = conn->srvfd;
if (conn->srvbuff_len > 0) {
fds[0].events |= POLLOUT;
}
if (conn->cltbuff_len > 0) {
fds[1].events |= POLLOUT;
}
if (!conn->srvbuff_len) {
fds[1].events |= POLLIN;
}
if (!conn->cltbuff_len) {
fds[0].events |= POLLIN;
}
ret = poll(fds, 2, 1000);
if (fds[1].revents & POLLOUT) {
ret = write_buffer(conn->srvfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (fds[1].revents & POLLIN) {
ret = read_buffer(conn->srvfd, &conn->srvbuff, &conn->srvbuff_len);
}
if (fds[0].revents & POLLIN) {
ret = read_buffer(conn->cltfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (fds[0].revents & POLLOUT) {
ret = write_buffer(conn->cltfd, &conn->srvbuff, &conn->srvbuff_len);
}
if (fds[0].revents & POLLHUP) {
break;
}
if (ret < 0) {
break;
}
}
if (conn->cltbuff_len > 0) {
write_buffer(conn->srvfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (conn->srvbuff_len > 0) {
write_buffer(conn->cltfd, &conn->srvbuff, &conn->srvbuff_len);
}
close(conn->cltfd);
close(conn->srvfd);
exit(0); // child die
} }
int do_srv(void) { int do_srv(void) {
@ -476,9 +462,8 @@ int do_srv(void) {
return -1; return -1;
} }
fprintf(stdout, "Listening on port %d\n", PROXY_PORT);
for (;;) { for (;;) {
fprintf(stdout, "listening for sockets\n");
struct sockaddr_in new_clt_addr; struct sockaddr_in new_clt_addr;
socklen_t new_clt_addr_len= sizeof(new_clt_addr); socklen_t new_clt_addr_len= sizeof(new_clt_addr);
int new_clt_sock; int new_clt_sock;
@ -490,6 +475,7 @@ int do_srv(void) {
"client\n"); "client\n");
return -1; return -1;
} }
fprintf(stdout, "accepted new client socket\n");
ret = fork(); ret = fork();
if (ret < 0) { if (ret < 0) {
@ -499,12 +485,11 @@ int do_srv(void) {
} }
if (ret > 0) { if (ret > 0) {
fprintf(stdout, "[PROGRAM] Successfully forked a new child process" fprintf(stdout, "+new request process:%d(pid)\n", ret);
" with PID %d\n", ret);
continue; continue;
} }
// child // request process
struct conn *conn = (struct conn *) calloc(1, sizeof(struct conn)); struct conn *conn = (struct conn *) calloc(1, sizeof(struct conn));
if (!conn) { if (!conn) {
fprintf(stderr, "Not enough dynamic memory to establish connection\n"); fprintf(stderr, "Not enough dynamic memory to establish connection\n");
@ -512,14 +497,9 @@ int do_srv(void) {
} }
conn->cltfd = new_clt_sock; conn->cltfd = new_clt_sock;
statem = STATEM_RCV_CLT; statem = state_rcv_clt;
do_statem(conn); do_statem(conn);
free(conn); free(conn);
if (debug == 1) {
fprintf(stdout, "Finished proxying client\n");
}
return 0; return 0;
} }
@ -533,7 +513,10 @@ int main(int argc, char *argv[]) {
return -1; return -1;
} }
return do_srv(); ret = do_srv();
if (ret < 0) {
return -1;
}
fretres(); fretres();
} }

View File

@ -4,18 +4,41 @@
#include "parslib/parslib.h" #include "parslib/parslib.h"
#ifndef MAX_BOUND #ifndef MAX_BOUND
#define MAX_BOUND 10000 #define MAX_BOUND 100000000000
#endif #endif
#define PROXY_PORT 2020 #define PROXY_PORT 2020
#define PROXY_CONN 20 #define PROXY_CONN 20
#define RELAY_BUFFER_SIZE 1024*2
#define RELAY_POLL_TIMEOUT 1000
#define STATEM_RCV_CLT 0b00000001 enum states {
#define STATEM_CON_SRV 0b00000010 state_rcv_clt = 0,
#define STATEM_FWD_SRV 0b00000100 state_con_srv,
#define STATEM_RCV_SRV 0b00001000 state_fwd_srv,
#define STATEM_FWD_CLT 0b00010000 state_rcv_srv,
#define STATEM_ERR 0b00100000 state_fwd_clt,
state_ok
};
#define ERR_GENERIC -1
#define ERR_MEM -2
#define ERR_RECV -3
#define ERR_SEND -4
#define ERR_PARS -5
#define ERR_PARSTITLE -6
#define ERR_PARSHEADER -7
#define ERR_SUPPORT -8
#define ERR_TIMEOUT -9
char *states_str[] = {
"state_rcv_clt",
"state_con_srv",
"state_fwd_srv",
"state_rcv_srv",
"state_fwd_clt",
"state_ok"
};
struct conn { struct conn {
int cltfd; int cltfd;