-
Notifications
You must be signed in to change notification settings - Fork 0
/
url.c
235 lines (197 loc) · 6.27 KB
/
url.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#include <stdlib.h>
#include "url.h"
#include "download.h"
struct link_data {
char host[HOST_SIZE];
char identifer[HOST_SIZE];
char file[FILE_SIZE];
};
struct link_data contents[MAX_LINK_URL];
int length = 0;
BOOL download_url(struct url_info *url) {
int i, noof = 0;
struct hostent *h;
time_t start_time, finish_time;
struct thread_data mthread;//Main thread
struct thread_data *wthreads;//Worker
char tmpDir[FILE_SIZE], tmp[FILE_SIZE];
#ifdef DEBUG
printf("\t[Url.c ] Start of download_url \n");
#endif
if (!url) {
#ifdef DEBUG
fprintf(stderr,"The url is not defined yet \n");
#endif
return FALSE;
}
if ((h = gethostbyname(url->host)) == NULL) {
#ifdef DEBUG
printf("The hostname is invalid\n");
#endif
return FALSE;
}
#ifdef DEBUG
printf("\t[Url.c ] Got hostname infos\n");
#endif
mthread.sin.sin_family = (sa_family_t) h->h_addrtype;
memcpy((char *) &mthread.sin.sin_addr.s_addr, h->h_addr_list[0], (size_t) h->h_length);
mthread.sin.sin_port = htons(80);
sprintf(mthread.request, GETREQ2, url->identifer, url->host);
#ifdef DEBUG
printf("\t[Url.c ] Creating main thread\n");
#endif
mthread.id = 0;
strcpy(mthread.file, url->file);
strcpy(mthread.dir, "\0");
strcpy(mthread.identifer, url->identifer);
mthread.status = STATUS_STARTED;
mthread.type = TYPE_LINKS;
time(&start_time);
printf("\tTrying to downloa main page [%s]\n", mthread.file);
#ifdef DEBUG
printf("\t[Url.c ] pthread_create\n");
#endif
pthread_create(&(mthread.thread), NULL, downloader, (void *) &mthread);
#ifdef DEBUG
printf("\t[Url.c ] pthread_join\n");
#endif
pthread_join(mthread.thread, NULL);
time(&finish_time);
printf("\tMain page is downloaded in %ld sec[s]\n", (finish_time - start_time));
if (mthread.status != STATUS_SUCCESS) {
printf("\tConnection failed while downloading main page\n");
return FALSE;
}
strncpy(tmpDir, url->file, 3);
strcat(tmpDir, "_files");
strcpy(tmp, "mkdir ");
strcat(tmp, tmpDir);
system(tmp);//Create a new directory
#ifdef DEBUG
printf("\t[Url.c ] url->file: %s\n",url->file);
#endif
noof = parse_file(url);
if (noof < 0) {
printf("\tThere are some problem while parsing links\n");
return FALSE;
} else if (i == 0) {
printf("\tThere are no link");
return TRUE;
}
wthreads = (struct thread_data *) malloc(i * sizeof(struct thread_data));
if (!wthreads) {
#ifdef DEBUG
printf("\tThe memory is full\n");
#endif
return FALSE;
}
#ifdef DEBUG
printf("\t >>>>> The links : total: %d \n",length);
for(i=0;i<length;i++){
printf("\t%d >> Host :%s -- File :%s\n",i,contents[i].host,contents[i].file);
}
sleep(3);
#endif
i = 0;
time(&start_time);//Get start time
while (i < length) {
progress_bar((float) i + 1.0, (float) noof);
wthreads[i].id = i + 1;
memcpy(&(wthreads[i].sin), &(mthread.sin), sizeof(mthread.sin));
sprintf((wthreads[i].request), GETREQ2, contents[length].identifer, contents[length].host);
strcpy(wthreads[i].dir, tmpDir);
wthreads[i].status = STATUS_STARTED;
wthreads[i].type = TYPE_LINKS;
strcpy(wthreads[i].file, contents[length].file);
pthread_create(&(wthreads[i].thread), NULL, downloader, (void *) &(wthreads[i]));
i++;
}
time(&finish_time);//Get finish time
for (i = 0; i < noof; i++) {
pthread_join(wthreads[i].thread, NULL);
//if (wthreads[i].status==STATUS_SUCCESS) finish_file++;
}
printf("\tAll download finished in %ld sec[s] \n", (finish_time - start_time));
return TRUE;
}
int parse_file(struct url_info *url) {
FILE *in;
BOOL isOK;
int ch, state = 0, i = 0;
char buf[BUF_SIZE];
int no = 0;
#ifdef DEBUG
printf("\t[Url.c ] parse_file start File:%s\n",url->file);
#endif
if (!url) return -1;
in = fopen(url->file, "rb+");
if (!in) {
#ifdef DEBUG
fprintf(stderr,"\t[Url.c ] * File is not exist\n");
#endif
return -1;
}
while ((ch = fgetc(in)) != EOF) {
char *retCh = 0;
isOK = FALSE;
switch (ch) {
case '<':
i = 0;
state = 1;
break;
case '>':
isOK = parse_string(retCh, buf);
state = 0;
i = 0;
break;
default:
if (state == 1) {
buf[i++] = (char) ch;
buf[i] = '\0';
}
break;
};
if (isOK) {
if (length >= MAX_LINK_URL) return no;
no++;
if (strncmp(retCh, "http://", 7) == 0) {
char *s;
s = strtok(retCh + 7, "/");
strcpy(contents[length].host, s);
if (s == NULL) strcpy(contents[length].identifer, "/");
else strcpy(contents[length].identifer, retCh + 7 + strlen(s));
reverse_string(retCh);
i = 0;
for (s = retCh; *s != '/'; s++) {
if (*s == '\0') break;
contents[length].file[i++] = *s;
contents[length].file[i] = '\0';
}
reverse_string(contents[length].file);
reverse_string(retCh);
#ifdef DEBUG
printf("host:%s and identifer:%s",contents[length].host,contents[length].identifer);
#endif
} else {
char *s;
strcpy(contents[length].host, url->host);
strcpy(contents[length].identifer, retCh);
reverse_string(retCh);
i = 0;
for (s = retCh; *s != '/'; s++) {
if (*s == '\0') break;
contents[length].file[i++] = *s;
contents[length].file[i] = '\0';
}
reverse_string(contents[length].file);
reverse_string(retCh);
#ifdef DEBUG
printf("host:%s and identifer:%s",contents[length].host,contents[length].identifer);
#endif
}
length++;
}
}
fclose(in);
return no;
}/*end of the parse_file */