CSAPP-proxy lab

Implementing a sequential web proxy

先搞清listen socket 和 connected socket 的区别。

upload succful

一个套接字对标记着一个客户端和服务器的链接。

客户端是发起连接请求的主动实体,而内核会认为socket函数创建的套接字是主动套接字,而服务器就是要调用listen函数告诉内核,该套接字是被服务器而不是客户端使用的,即listen函数将一个主动套接字转化为监听套接字

服务器通过accept函数等待来自客户端的连接请求到达监听套接字,并返回一个已连接套接字,这个connfd可以被用来与客户端进行通讯。

实验过程如下:

upload successful

some def

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *prox_hdr = "Proxy-Connection: close\r\n";
static const char *hostFormat = "Host: %s\r\n";
static const char *requestHeaderFormat = "GET %s HTTP/1.0\r\n";
static const char *endof_hdr = "\r\n";
static const char *connection_key = "Connection";
static const char *user_agent_key= "User-Agent";
static const char *proxy_connection_key = "Proxy-Connection";
static const char *hostKey = "Host";

void doit(int fd);
void parse_uri(char *uri,char *hostname,char *path,int *port);
void buildHTTPHeader(char *http_header,char *hostname,char *path,int port,rio_t *client_rio);
int connectEndServer(char *hostname,int port,char *httpHeader);
void *thread(void *vargp);
void initCache();
int reader(int fd,char *uri);
void writer(char *uri,char *buf);
//reference: https://zhuanlan.zhihu.com/p/37902495

typedef struct{
char *buf;
char *uri;
}cacheLine;

typedef struct{
cacheLine* objects;
int count;
}Cache;

Cache cache;
int readCount;
sem_t mutex,wmutex;
//用于多线程编程中,防止两条线程同时对同一公共资源(比如全局变量)进行读写的机制

main 函数,参考课本的tiny服务器,注意的是这里pthread_create是传值而不是引用,是为了避免竞争。(传值是传一个独立的副本)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
int main(int argc,char **argv)
{

int listenfd,connfd;
socklen_t clientlen;
char hostname[MAXLINE];
char port[MAXLINE];
struct sockaddr_storage clientaddr;
pthread_t tid;
if(argc!=2){
fprintf(stderr,"usage %s <port>\n",argv[0]);
exit(1);
}
initCache();
//ignore the SIGPIPE signal
signal(SIGPIPE, SIG_IGN);
//transfrom the fd to listenfd
listenfd = Open_listenfd(argv[1]);
while(1){
clientlen = sizeof(clientaddr);
connfd = Accept(listenfd,(SA*)&clientaddr,&clientlen);
//ip->host name
Getnameinfo((SA *)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0);
printf("Accepted connection from (%s %s)\n",hostname,port);
//pass value of connfd to create function to avoid competition
Pthread_create(&tid,NULL,thread,(void*)connfd);
}
return 0;
}

多线程并发

1
2
3
4
5
6
7
void *thread(void *vargp){
int connfd = (int)vargp;
//要把线程分离出去,让这个线程计数结束之后自己回收资源,避免内存泄露。
Pthread_detach(pthread_self());
doit(connfd);
Close(connfd);
}

doit

函数逻辑:

1.得到解析后的请求行和请求头

2.然后去连接对应的服务器,发送请求

3.建立连接后,返回信息会在描述符中,也就是endServerFd

4.再把信息从endServerFd中读取出来,直接写进客户端对应的描述符fd就可以。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67


//对客户端请求的HTTP header 进行处理,首先获得request header
//eg: GET http://www.zhihu.com HTTP/1.1
//然后对于请求URL进行分析,获取需要连接的服务器的hostname,port,
//修改客户端的HTTP,让proxy充当客户端把信息转发给正确的服务器,然后接收服务器
//的返回并转发给正确的客户端
void doit(int connfd){

char buf[MAXLINE],uri[MAXLINE],method[MAXLINE],version[MAXLINE];
//parseRequest(fd,&requestLine,headers,&numHead);
char endServerHTTP [MAXLINE];
char hostname[MAXLINE],path[MAXLINE];
char objectBUF[MAX_OBJECT_SIZE];
int port,endServerFd;

rio_t rio,serverRio;

Rio_readinitb(&rio,connfd);
Rio_readlineb(&rio,buf,MAXLINE);
//read GET http://www.zhihu.com HTTP/1.1
//format read function
sscanf(buf,"%s %s %s",method,uri,version);

if(strcasecmp(method,"GET")){
printf("Proxy does not implement the method");
return;
}

//parse the uri and save the hostname,path,port number to the argument
parse_uri(uri,hostname,path,&port);

//build the http header which will send to the end server
buildHTTPHeader(endServerHTTP,hostname,path,port,&rio);


strcpy(uri,hostname);
strcpy(uri+strlen(uri),path);
if(reader(connfd,uri)){
fprintf(stdout,"%s from cache\n",uri);
fflush(stdout);
return;
}

int totalSize = 0;
//connect to the end server;
endServerFd = connectEndServer(hostname,port);
if(endServerFd<0){
printf("connection failed");
return;
}
Rio_readinitb(&serverRio,endServerFd);
Rio_writen(endServerFd,endServerHTTP,strlen(endServerHTTP));

//receive message from end server and send to client
size_t n;
while((n=Rio_readlineb(&serverRio,buf,MAXLINE))){
printf("proxy received %ld bytes,then send.\n",n);
Rio_writen(connfd,buf,n);
strcpy(objectBUF+totalSize,buf);
totalSize+=n;
}
//each objectBUF save all info of the request
if(totalSize<MAX_OBJECT_SIZE)
writer(uri,objectBUF);
Close(endServerFd);
}

build HTTP that send to the end server

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
void buildHTTPHeader(char *http_header,char *hostname,char *path,int port,rio_t *client_rio){

char buf[MAXLINE],requestHeader[MAXLINE],otherHeader[MAXLINE],hostHeader[MAXLINE];

//request line
//static const char *requestHeaderFormat = "GET %s HTTP/1.0\r\n";
//把path内容按照格式写入requestHeader
sprintf(requestHeader,requestHeaderFormat,path);
while(Rio_readlineb(client_rio,buf,MAXLINE)>0){

if(!strcmp(buf,endof_hdr)){
break; //EOF
}
//Host
if(!strncasecmp(buf,hostKey,strlen(hostKey))){
strcpy(hostHeader,buf);
continue;
}
if(!strncasecmp(buf,connection_key,strlen(connection_key))&&!
strncasecmp(buf,proxy_connection_key,strlen(proxy_connection_key)),
!strncasecmp(buf,user_agent_key,strlen(user_agent_key))){
//把两个串连接起来
strcat(otherHeader,buf);
}

}
if(strlen(hostHeader)==0){
sprintf(hostHeader,hostFormat,buf);
}
//static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
//static const char *conn_hdr = "Connection: close\r\n";
//static const char *prox_hdr = "Proxy-Connection: close\r\n";
//static const char *endof_hdr = "\r\n";

//put all header to http_header
sprintf(http_header,"%s%s%s%s%s%s%s",requestHeader,hostHeader,
conn_hdr,prox_hdr,user_agent_hdr,otherHeader,endof_hdr);
return;
}

parseuri

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

void parse_uri(char *uri,char *hostname,char *path,int *port){

//strstr(str1,str2) 函数用于判断字符串str2是否是str1的子串。
//如果是,则该函数返回str2在str1中首次出现的地址;否则,返回NULL。

*port = 80;
char *pos1 = strstr(uri,"//");

if(pos1){
pos1 = pos1+2;
}else{
pos1 = uri;
}

char *pos2 = strstr(pos1,":");
//case the uri has the port info
if(pos2){

// userinfo host port
// ┌─┴────┐ ┌────┴────────┐ ┌┴┐
// https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top
// └─┬─┘ └───────┬────────────────────┘└─┬─────────────┘└──┬───────────────────────┘└┬─┘
// scheme authority path query fragment


//initalize the head of pos2 is \0 i.e clean the pos2
*pos2 = '\0';
sscanf(pos1,"%s",hostname);
sscanf(pos2+1,"%d%s",port,path);
}
else{
//
//telnet://192.0.2.16:80/xxx
//└──┬─┘ └──────┬──────┘│
//scheme authority path
// no port info
pos2 = strstr(pos1,"/");
if(pos2){
sscanf(pos1,"%s",hostname);
*pos2 = '/';
sscanf(pos2,"%s",path);
}
//only hostname info
else{
sscanf(pos1,"%s",hostname);
}
}
return;
}

connect end server

1
2
3
4
5
6
7
inline int connectEndServer(char *hostname,int port){

char portStr[100];
sprintf(portStr,"%d",port);
return Open_clientfd(hostname,portStr);

}

cache

采用读者-写者模型,可以让多个线程同时来读。

没有实现LRU,只是简单地把1MiB内存分为十块,每次接受请求并解析之后,先去cache看看有没有对应的web object,如果有直接返回给客户端,没有再从服务端请求。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59


int reader(int fd,char *uri){

//here uri = each server's hostname+path
int Found = 0;
P(&mutex);
readCount++;
if(readCount==1){
P(&wmutex);
}
V(&mutex);

for(int i=0;i<10;i++){
if(!strcmp(cache.objects[i].uri,uri)){
Rio_writen(fd,cache.objects[i].buf,MAX_OBJECT_SIZE);
Found=1;
break;
}
}

P(&mutex);
readCount--;
if(readCount==0){
V(&wmutex);
}
V(&mutex);
return Found;
}

void writer(char *uri,char *buf){

P(&wmutex);
strcpy(cache.objects[cache.count].uri,uri);
strcpy(cache.objects[cache.count].buf,buf);
++cache.count;
V(&wmutex);
}

//simple cache no use LRU,just split the memory to 10 block,
//each time use a loop to find whether the uri of the request is in the block
//在server和client之间加入代理的好处之一,就可以实现cache化。
//因为,经常有很多对同一个资源多次请求的情况,如果每次都从服务端获取,那样服务器会很累。
//如果可以在代理部分就实现一个cache,
//将最近客户端请求过的数据给存储起来,那样就不需要每次都要从服务器请求了,进而提高服务器的效率。


void initCache(){

sem_init(&mutex,0,1);
sem_init(&wmutex,0,1);
cache.objects = (cacheLine*)malloc(sizeof(cacheLine)*10);
cache.count=0;
readCount=0;
for(int i=0;i<10;i++){
cache.objects[i].buf = malloc(sizeof(char)*MAXLINE);
cache.objects[i].uri = malloc(sizeof(char)*MAX_OBJECT_SIZE);
}
}

Test

  • use ./free-port.sh to get a free port, like 4501

  • open a terminal, nc -l 4501

    • this is to start netcat as a server listening on port you get
  • open a terminal

1
curl -v --proxy http://localhost:23885/ http://localhost:4501/
  • open a terminal

./proxy 23885

filename alady exists, renamed

netcat is listening on 4501,proxy is listening on 23885,here netcat serves as a server,print sth in the ‘nc -l’ window,then you can see the exact sth print on the ‘curl’ window

Ref

https://blog.csdn.net/u012336567/article/details/52056089

https://zhuanlan.zhihu.com/p/37902495