Compare commits

..

No commits in common. "f0f11e111b21063e21bc12325e1d7b3b4c019656" and "f1e2efc9d2ebfa0bd5605293cf9ddaa8179131c6" have entirely different histories.

5 changed files with 261 additions and 278 deletions

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
*.o *.o
proxy proxy
compile_commands.json compile_commands.json
.cache

20
README
View File

@ -4,17 +4,12 @@
Proxy C library for HTTP/s protocol Proxy C library for HTTP/s protocol
NOTE: I have yet to turn this piece of clutter into a library.
RFC: datatracker.ietf.org/doc/html/rfc1945 RFC: datatracker.ietf.org/doc/html/rfc1945
RES: cs.princeton.edu/courses/archive/spr13/cos461/assignments-proxy.html RES: cs.princeton.edu/courses/archive/spr13/cos461/assignments-proxy.html
RES: Beej's Guide to Network Programming - Using Internet Sockets RES: Beej's Guide to Network Programming - Using Internet Sockets
RES: en.wikipedia.org/wiki/Proxy_server RES: en.wikipedia.org/wiki/Proxy_server
RES: TCP/IP sockets in C - Practical guide for programmers 2nd edition RES: TCP/IP sockets in C - Practical guide for programmers 2nd edition
RES: tinyproxy.github.io/
RES: github.com/nginx/nginx
TECHNICALS TECHNICALS
* Parsing is carried by a dedicated sub-library which I wrote named parslib. * Parsing is carried by a dedicated sub-library which I wrote named parslib.
@ -23,10 +18,6 @@ TECHNICALS
no matter the amount of strings you want to compare it against (I am indeed no matter the amount of strings you want to compare it against (I am indeed
aware it is nonetheless worse than hashtables - maybe one day I will implement aware it is nonetheless worse than hashtables - maybe one day I will implement
those on my own as well). those on my own as well).
* After the connection is established with the upstream server, a relay loop
is started in which the client data is relayed to the server's socket and
server data is relayed to client's socket. This is highly inspired from
tinyproxy.
TASKS TASKS
@ -35,16 +26,15 @@ TASKS
[DONE] Implement client message parsing [DONE] Implement client message parsing
[DONE] Implement server message parsing [DONE] Implement server message parsing
[DONE] Add loose string checking for headers [DONE] Add loose string checking for headers
[DONE] Relaying mechanism
[DOING] Cover all possible body segmentation standards [DOING] Cover all possible body segmentation standards
[DONE] Cover Content-Length [DONE] Cover Content-Length
[DONE] Cover "chunked transfer encoding" [DOING] Cover "chunked transfer encoding"
[TODO] Cover "compress transfer encoding" [DOING] Cover "compress transfer encoding"
[TODO] Cover "deflate transfer encoding" [DOING] Cover "deflate transfer encoding"
[TODO] Cover "gzip/x-gzip transfer encoding" [DOING] Cover "gzip/x-gzip transfer encoding"
[ACTIVELY DOING] More testing, debugging, fixing [ACTIVELY DOING] More testing, debugging, fixing
[ACTIVELY DOING] Verify and search for memory leaks [ACTIVELY DOING] Verify and search for memory leaks
[TODO] Implement HTTPS [TODO] Implement HTTPS with OpenSSL, LibreSSL, or BearSSL
[TODO] Caching? [TODO] Caching?
COMMITS COMMITS

BIN
proxlib

Binary file not shown.

479
proxlib.c
View File

@ -7,14 +7,12 @@
#include <arpa/inet.h> #include <arpa/inet.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <netdb.h> #include <netdb.h>
#include <poll.h>
#include "proxlib.h" #include "proxlib.h"
#include "parslib/parslib.h" #include "parslib/parslib.h"
int on = 1; int on = 1;
int debug = 1; int debug = 3;
int statem = 0; int statem;
int err = 0;
#define SEGMENT_LEN 512 #define SEGMENT_LEN 512
#define MAX_BUFF_LEN 128 * 1024 #define MAX_BUFF_LEN 128 * 1024
@ -94,107 +92,181 @@ int read_line(int fd,
return 0; return 0;
} }
int pull_content_length(int fd, int len, int *msgbuff_len, char **msgbuff) { int parse_line(char *line, int line_count) {
int ret = 0; int ret = 0;
int line_len = len;
char *line = (char *) calloc(1, line_len); return ret;
}
void do_err(void) {
int statem_code = statem & (~STATEM_ERR);
fprintf(stderr, "[%d,%d,%d] Errored out!\n", statem, statem_code,
STATEM_ERR);
}
int do_fwd_clt(struct conn *conn) {
int bytes = 0;
int ret = 0;
while (bytes < conn->srvbuff_len) {
ret = write(conn->cltfd, conn->srvbuff+bytes, conn->srvbuff_len-bytes);
if (ret < 0)
return -1;
bytes += ret;
}
return 0;
}
int do_rcv_srv(struct conn *conn) {
int ret = 0;
char *line = NULL;
char *msgbuff = NULL;
int line_len = 0;
int msgbuff_len = 0;
// response line
ret = read_line(conn->srvfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving response line from upstream\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] received line: %s\n", line);
}
ret = parestitl(line, line_len, &(conn->srvres.titl));
if (ret < 0) {
fprintf(stderr, "Failed parsing response line\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] parsed response line\n");
}
free(line);
// headers
int next_header = 1;
while (next_header) {
ret = read_line(conn->srvfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving header line\n");
return -1;
}
if (line_len == 0) {
if (debug == 1) {
fprintf(stdout, "debug - [upstream] reached end of headers\n");
}
next_header = 0;
continue;
}
if (debug == 1) {
fprintf(stdout, "debug - [upstream] received line: %s\n", line);
}
ret = parshfield(line, line_len, conn->srvres.hentries);
if (ret < 0) {
fprintf(stderr, "Failed parsing header field\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed header field\n");
}
free(line);
}
// body
body:
struct httpares *res = &conn->srvres;
struct point *content_length_entry = &res->hentries[header_content_length];
if (content_length_entry->er == NULL) {
fprintf(stderr, "[upstream] no content length header\n");
return -1;
}
int content_length = 0;
ret = stoin(content_length_entry->er, content_length_entry->len, &content_length);
if (ret < 0) {
fprintf(stderr, "[upstream] failed parsing content length header\n");
return -1;
}
line_len = content_length;
line = (char *) calloc(1, line_len);
if (!line) { if (!line) {
return ERR_MEM; fprintf(stderr, "[upstream] not enough dynamic memory\n");
return -1;
} }
int bytes = 0; int bytes = 0;
do { do {
ret = recv(fd, line+bytes, line_len-bytes, MSG_WAITALL); ret = recv(conn->srvfd, line+bytes, line_len-bytes, MSG_WAITALL);
if (ret < 0) { if (ret < 0) {
return ERR_MEM; fprintf(stderr, "[upstream] failed reading body from response\n");
return -1;
} }
bytes += ret; bytes += ret;
} while (bytes < line_len); } while (bytes < line_len);
*msgbuff = (char *) realloc(*msgbuff, *msgbuff_len+line_len); msgbuff = (char *) realloc(msgbuff, msgbuff_len+line_len);
if (!*msgbuff) { if (!msgbuff) {
return ERR_MEM; fprintf(stderr, "[upstream] not enough dynamic memory\n");
return -1;
} }
memcpy(*msgbuff+*msgbuff_len, line, line_len); memcpy(msgbuff+msgbuff_len, line, line_len);
*msgbuff_len += line_len; msgbuff_len += line_len;
if (debug <= 2) {
fprintf(stdout, "------------------------------\n");
fprintf(stdout, "debug - [upstream] received body %d: %.*s\n", line_len, line_len, line);
fprintf(stdout, "------------------------------\n");
}
if (debug <= 2) {
fprintf(stdout, "printing parsed response\n");
printfpares(&conn->srvres);
}
conn->srvbuff = msgbuff;
conn->srvbuff_len = msgbuff_len;
return 0; return 0;
} }
int pull_chunked_encoding(int fd, int *msgbuff_len, char **msgbuff) {
int ret = 0;
char *line = NULL;
int line_len = 0;
while (1) {
ret = read_line(fd, &line_len, &line, msgbuff_len, msgbuff);
if (ret < 0) {
fprintf(stderr, "Failed receiving chunked body from upstream\n");
return -1;
}
line_len = strtol(line, (char **) 0, 16);
if (!line_len) {
break;
}
line_len += 2;
free(line);
line = (char *) calloc(1, line_len);
if (!line) {
fprintf(stderr, "Not enough dynamic memory\n");
return -1;
}
int bytes = 0;
do {
ret = recv(fd, line+bytes, line_len-bytes, MSG_WAITALL);
if (ret < 0) {
fprintf(stderr, "Failed reading respones body from server\n");
return -1;
}
bytes += ret;
} while (bytes < line_len);
*msgbuff = (char *) realloc(*msgbuff, *msgbuff_len+line_len);
if (!msgbuff) {
fprintf(stderr, "Not enough dynamic memory\n");
return -1;
}
memcpy(*msgbuff+*msgbuff_len, line, line_len);
*msgbuff_len += line_len;
free(line);
}
return 0;
}
void do_err(void) {
fprintf(stderr, "failed with error code %d\n", err);
}
int do_con_srv(struct conn *conn) { int do_con_srv(struct conn *conn) {
statem = state_con_srv;
int ret = 0; int ret = 0;
struct httpareq *req = &conn->cltreq; struct httpareq *req = &conn->cltreq;
struct point *host = &req->hentries[header_host]; struct point *host = &req->hentries[header_host];
if (host->er == NULL) { if (host->er == NULL) {
return ERR_PARS; if (debug <= 2) {
fprintf(stderr, "debug - request does not have HOST header\n");
}
goto _exit;
} }
struct hostinfo *info = (struct hostinfo *) calloc(1, sizeof(struct hostinfo)); struct hostinfo *info = (struct hostinfo *) calloc(1, sizeof(struct hostinfo));
if (!info) { if (!info) {
return ERR_MEM; goto _exit;
} }
ret = pahostinfo(host->er, host->len, info); ret = pahostinfo(host->er, host->len, info);
if (ret < 0) { if (ret < 0) {
return ERR_PARS; if (debug <= 2) {
fprintf(stderr, "Failed parsing upstream host header\n");
}
goto _exit_hostinfo;
}
if (debug <= 2) {
fprintf(stdout, "Establishing connection with upstream: %.*s : %.*s\n", info->hostname_len, info->hostname, info->service_len, info->service);
} }
struct addrinfo hints; struct addrinfo hints;
@ -206,37 +278,45 @@ int do_con_srv(struct conn *conn) {
ret = getaddrinfo(info->hostname, info->service, &hints, &res); ret = getaddrinfo(info->hostname, info->service, &hints, &res);
if (ret < 0) { if (ret < 0) {
free(info->hostname); goto _exit_hostinfo;
free(info->service);
free(info);
return ERR_PARS;
} }
ret = conn->srvfd = socket(res->ai_family, res->ai_socktype, ret = conn->srvfd = socket(res->ai_family, res->ai_socktype,
res->ai_protocol); res->ai_protocol);
if (ret < 0) { if (ret < 0) {
freeaddrinfo(res); goto _exit_getaddrinfo;
free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
} }
ret = connect(conn->srvfd, res->ai_addr, res->ai_addrlen); ret = connect(conn->srvfd, res->ai_addr, res->ai_addrlen);
if (ret < 0) { if (ret < 0) {
freeaddrinfo(res); goto _exit_getaddrinfo;
free(info->hostname);
free(info->service);
free(info);
return ERR_PARS;
} }
_exit_getaddrinfo:
freeaddrinfo(res);
_exit_hostinfo:
free(info->hostname);
free(info->service);
free(info);
_exit:
return ret; return ret;
} }
int do_rcv_clt(struct conn *conn) { int do_fwd_srv(struct conn *conn) {
statem = state_rcv_clt; int bytes = 0;
int ret = 0;
while (bytes < conn->cltbuff_len) {
ret = write(conn->srvfd, conn->cltbuff+bytes, conn->cltbuff_len-bytes);
if (ret < 0)
return -1;
bytes += ret;
}
return 0;
}
int do_rcv_clt(struct conn *conn) {
int ret = 0; int ret = 0;
char *line = NULL; char *line = NULL;
char *msgbuff = NULL; char *msgbuff = NULL;
@ -246,12 +326,22 @@ int do_rcv_clt(struct conn *conn) {
// request line // request line
ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff); ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) { if (ret < 0) {
return ERR_RECV; fprintf(stderr, "Failed receiving request line\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - received line: %s\n", line);
} }
ret = pareqtitl(line, line_len, &(conn->cltreq.titl)); ret = pareqtitl(line, line_len, &(conn->cltreq.titl));
if (ret < 0) { if (ret < 0) {
return ERR_PARSTITLE; fprintf(stderr, "Failed parsing request line\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed request line\n");
} }
free(line); free(line);
@ -261,43 +351,41 @@ int do_rcv_clt(struct conn *conn) {
while (next_header) { while (next_header) {
ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff); ret = read_line(conn->cltfd, &line_len, &line, &msgbuff_len, &msgbuff);
if (ret < 0) { if (ret < 0) {
return ERR_RECV; fprintf(stderr, "Failed receiving header line\n");
return -1;
} }
if (line_len == 0) { if (line_len == 0) {
if (debug == 1) {
fprintf(stdout, "debug - reached end of headers\n");
}
next_header = 0; next_header = 0;
continue; continue;
} }
if (debug == 1) {
fprintf(stdout, "debug - received line: %s\n", line);
}
ret = parshfield(line, line_len, conn->cltreq.hentries); ret = parshfield(line, line_len, conn->cltreq.hentries);
if (ret < 0) { if (ret < 0) {
return ERR_PARSHEADER; fprintf(stderr, "Failed parsing header field\n");
return -1;
}
if (debug == 1) {
fprintf(stdout, "debug - parsed header field\n");
} }
free(line); free(line);
} }
// body // body
struct httpareq *req = &conn->cltreq; // TODO
struct point *content_length_entry = &req->hentries[header_content_length];
struct point *transfer_encoding_entry = &req->hentries[header_transfer_encoding];
if (content_length_entry->er) {
int content_length = 0;
ret = stoin(content_length_entry->er, content_length_entry->len, &content_length); if (debug <= 2) {
if (ret < 0) { fprintf(stdout, "printing parsed request\n");
return ERR_PARS; printfpareq(&conn->cltreq);
}
ret = pull_content_length(conn->srvfd, content_length, &msgbuff_len, &msgbuff);
if (ret < 0) {
return ERR_RECV;
}
} else if (transfer_encoding_entry->er && strcmp(transfer_encoding_entry->er, "chunked") == 0) {
ret = pull_chunked_encoding(conn->srvfd, &msgbuff_len, &msgbuff);
if (ret < 0) {
return ERR_RECV;
}
} }
conn->cltbuff = msgbuff; conn->cltbuff = msgbuff;
@ -306,125 +394,51 @@ int do_rcv_clt(struct conn *conn) {
return 0; return 0;
} }
int read_buffer(int fd, char **buff, int *len) { void do_clear(struct conn *conn) {
char *tmp = (char *) malloc(RELAY_BUFFER_SIZE); statem = STATEM_RCV_CLT;
if (!tmp) { frepareq(&conn->cltreq);
return ERR_MEM; frepares(&conn->srvres);
} free(conn->cltbuff);
free(conn->srvbuff);
memset(tmp, 0, RELAY_BUFFER_SIZE);
int bytes = recv(fd, tmp, RELAY_BUFFER_SIZE, 0);
if (bytes <= 0) {
free(tmp);
return ERR_RECV;
}
*buff = realloc(*buff, *len+bytes);
if (!*buff) {
free(tmp);
return ERR_MEM;
}
memcpy(*buff+*len, tmp, bytes);
*len += bytes;
return 0;
}
int write_buffer(int fd, char **buff, int *len) {
if (*len <= 0) {
*len = 0;
return 0;
}
int writen = send(fd, *buff, *len, 0);
if (writen < 0) {
return ERR_SEND;
}
char *trunc = (char *) malloc(*len-writen);
if (!trunc) {
return ERR_MEM;
}
memcpy(trunc, *buff+writen, *len-writen);
char *tofree = *buff; // FIXME: any better solution?
*buff = trunc;
*len -= writen;
free(tofree);
return 0;
} }
void do_statem(struct conn *conn) { void do_statem(struct conn *conn) {
int ret = 0; int ret = 0;
ret = do_rcv_clt(conn); for (int counter = 0; counter < MAX_BOUND; counter++) {
if (ret < 0) { switch (statem & (~STATEM_ERR)) {
err = ret; case STATEM_RCV_CLT:
do_err(); ret = do_rcv_clt(conn);
} break;
case STATEM_CON_SRV:
ret = do_con_srv(conn); ret = do_con_srv(conn);
if (ret < 0) { break;
err = ret; case STATEM_FWD_SRV:
do_err(); ret = do_fwd_srv(conn);
} break;
case STATEM_RCV_SRV:
// relay the data between the two sockets until the end of time ret = do_rcv_srv(conn);
ssize_t bytes_received; break;
struct pollfd fds[2]; case STATEM_FWD_CLT:
for (;;) { ret = do_fwd_clt(conn);
memset(fds, 0, 2*sizeof(struct pollfd));
fds[0].fd = conn->cltfd;
fds[1].fd = conn->srvfd;
if (conn->srvbuff_len > 0) {
fds[0].events |= POLLOUT;
}
if (conn->cltbuff_len > 0) {
fds[1].events |= POLLOUT;
}
if (!conn->srvbuff_len) {
fds[1].events |= POLLIN;
}
if (!conn->cltbuff_len) {
fds[0].events |= POLLIN;
}
ret = poll(fds, 2, 1000);
if (fds[1].revents & POLLOUT) {
ret = write_buffer(conn->srvfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (fds[1].revents & POLLIN) {
ret = read_buffer(conn->srvfd, &conn->srvbuff, &conn->srvbuff_len);
}
if (fds[0].revents & POLLIN) {
ret = read_buffer(conn->cltfd, &conn->cltbuff, &conn->cltbuff_len);
}
if (fds[0].revents & POLLOUT) {
ret = write_buffer(conn->cltfd, &conn->srvbuff, &conn->srvbuff_len);
}
if (fds[0].revents & POLLHUP) {
break; break;
} }
if (ret < 0) {
if (ret < 0)
statem |= STATEM_ERR;
if (statem & STATEM_ERR) {
do_err();
break; break;
} }
}
if (conn->cltbuff_len > 0) { if (statem & STATEM_FWD_CLT) {
write_buffer(conn->srvfd, &conn->cltbuff, &conn->cltbuff_len); do_clear(conn);
} continue;
if (conn->srvbuff_len > 0) { }
write_buffer(conn->cltfd, &conn->srvbuff, &conn->srvbuff_len);
}
close(conn->cltfd); statem <<= 1;
close(conn->srvfd); }
exit(0); // child die
} }
int do_srv(void) { int do_srv(void) {
@ -462,8 +476,9 @@ int do_srv(void) {
return -1; return -1;
} }
fprintf(stdout, "Listening on port %d\n", PROXY_PORT);
for (;;) { for (;;) {
fprintf(stdout, "listening for sockets\n");
struct sockaddr_in new_clt_addr; struct sockaddr_in new_clt_addr;
socklen_t new_clt_addr_len= sizeof(new_clt_addr); socklen_t new_clt_addr_len= sizeof(new_clt_addr);
int new_clt_sock; int new_clt_sock;
@ -475,7 +490,6 @@ int do_srv(void) {
"client\n"); "client\n");
return -1; return -1;
} }
fprintf(stdout, "accepted new client socket\n");
ret = fork(); ret = fork();
if (ret < 0) { if (ret < 0) {
@ -485,11 +499,12 @@ int do_srv(void) {
} }
if (ret > 0) { if (ret > 0) {
fprintf(stdout, "+new request process:%d(pid)\n", ret); fprintf(stdout, "[PROGRAM] Successfully forked a new child process"
" with PID %d\n", ret);
continue; continue;
} }
// request process // child
struct conn *conn = (struct conn *) calloc(1, sizeof(struct conn)); struct conn *conn = (struct conn *) calloc(1, sizeof(struct conn));
if (!conn) { if (!conn) {
fprintf(stderr, "Not enough dynamic memory to establish connection\n"); fprintf(stderr, "Not enough dynamic memory to establish connection\n");
@ -497,9 +512,14 @@ int do_srv(void) {
} }
conn->cltfd = new_clt_sock; conn->cltfd = new_clt_sock;
statem = state_rcv_clt; statem = STATEM_RCV_CLT;
do_statem(conn); do_statem(conn);
free(conn); free(conn);
if (debug == 1) {
fprintf(stdout, "Finished proxying client\n");
}
return 0; return 0;
} }
@ -513,10 +533,7 @@ int main(int argc, char *argv[]) {
return -1; return -1;
} }
ret = do_srv(); return do_srv();
if (ret < 0) {
return -1;
}
fretres(); fretres();
} }

View File

@ -4,41 +4,18 @@
#include "parslib/parslib.h" #include "parslib/parslib.h"
#ifndef MAX_BOUND #ifndef MAX_BOUND
#define MAX_BOUND 100000000000 #define MAX_BOUND 10000
#endif #endif
#define PROXY_PORT 2020 #define PROXY_PORT 2020
#define PROXY_CONN 20 #define PROXY_CONN 20
#define RELAY_BUFFER_SIZE 1024*2
#define RELAY_POLL_TIMEOUT 1000
enum states { #define STATEM_RCV_CLT 0b00000001
state_rcv_clt = 0, #define STATEM_CON_SRV 0b00000010
state_con_srv, #define STATEM_FWD_SRV 0b00000100
state_fwd_srv, #define STATEM_RCV_SRV 0b00001000
state_rcv_srv, #define STATEM_FWD_CLT 0b00010000
state_fwd_clt, #define STATEM_ERR 0b00100000
state_ok
};
#define ERR_GENERIC -1
#define ERR_MEM -2
#define ERR_RECV -3
#define ERR_SEND -4
#define ERR_PARS -5
#define ERR_PARSTITLE -6
#define ERR_PARSHEADER -7
#define ERR_SUPPORT -8
#define ERR_TIMEOUT -9
char *states_str[] = {
"state_rcv_clt",
"state_con_srv",
"state_fwd_srv",
"state_rcv_srv",
"state_fwd_clt",
"state_ok"
};
struct conn { struct conn {
int cltfd; int cltfd;