httpget.c (10691B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /* 7 * Author: Wan-Teh Chang 8 * 9 * Given an HTTP URL, httpget uses the GET method to fetch the file. 10 * The fetched file is written to stdout by default, or can be 11 * saved in an output file. 12 * 13 * This is a single-threaded program. 14 */ 15 16 #include "prio.h" 17 #include "prnetdb.h" 18 #include "prlog.h" 19 #include "prerror.h" 20 #include "prprf.h" 21 #include "prinit.h" 22 23 #include <stdio.h> 24 #include <string.h> 25 #include <stdlib.h> /* for atoi */ 26 27 #define FCOPY_BUFFER_SIZE (16 * 1024) 28 #define INPUT_BUFFER_SIZE 1024 29 #define LINE_SIZE 512 30 #define HOST_SIZE 256 31 #define PORT_SIZE 32 32 #define PATH_SIZE 512 33 34 /* 35 * A buffer for storing the excess input data for ReadLine. 36 * The data in the buffer starts from (including) the element pointed to 37 * by inputHead, and ends just before (not including) the element pointed 38 * to by inputTail. The buffer is empty if inputHead == inputTail. 39 */ 40 41 static char inputBuf[INPUT_BUFFER_SIZE]; 42 /* 43 * inputBufEnd points just past the end of inputBuf 44 */ 45 static char* inputBufEnd = inputBuf + sizeof(inputBuf); 46 static char* inputHead = inputBuf; 47 static char* inputTail = inputBuf; 48 49 static PRBool endOfStream = PR_FALSE; 50 51 /* 52 * ReadLine -- 53 * 54 * Read in a line of text, terminated by CRLF or LF, from fd into buf. 55 * The terminating CRLF or LF is included (always as '\n'). The text 56 * in buf is terminated by a null byte. The excess bytes are stored in 57 * inputBuf for use in the next ReadLine call or FetchFile call. 58 * Returns the number of bytes in buf. 0 means end of stream. Returns 59 * -1 if read fails. 60 */ 61 62 PRInt32 ReadLine(PRFileDesc* fd, char* buf, PRUint32 bufSize) { 63 char* dst = buf; 64 char* bufEnd = buf + bufSize; /* just past the end of buf */ 65 PRBool lineFound = PR_FALSE; 66 char* crPtr = NULL; /* points to the CR ('\r') character */ 67 PRInt32 nRead; 68 69 loop: 70 PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail && 71 inputTail <= inputBufEnd); 72 while (lineFound == PR_FALSE && inputHead != inputTail && dst < bufEnd - 1) { 73 if (*inputHead == '\r') { 74 crPtr = dst; 75 } else if (*inputHead == '\n') { 76 lineFound = PR_TRUE; 77 if (crPtr == dst - 1) { 78 dst--; 79 } 80 } 81 *(dst++) = *(inputHead++); 82 } 83 if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) { 84 *dst = '\0'; 85 return dst - buf; 86 } 87 88 /* 89 * The input buffer should be empty now 90 */ 91 PR_ASSERT(inputHead == inputTail); 92 93 nRead = PR_Read(fd, inputBuf, sizeof(inputBuf)); 94 if (nRead == -1) { 95 *dst = '\0'; 96 return -1; 97 } else if (nRead == 0) { 98 endOfStream = PR_TRUE; 99 *dst = '\0'; 100 return dst - buf; 101 } 102 inputHead = inputBuf; 103 inputTail = inputBuf + nRead; 104 goto loop; 105 } 106 107 PRInt32 DrainInputBuffer(char* buf, PRUint32 bufSize) { 108 PRInt32 nBytes = inputTail - inputHead; 109 110 if (nBytes == 0) { 111 if (endOfStream) { 112 return -1; 113 } else { 114 return 0; 115 } 116 } 117 if ((PRInt32)bufSize < nBytes) { 118 nBytes = bufSize; 119 } 120 memcpy(buf, inputHead, nBytes); 121 inputHead += nBytes; 122 return nBytes; 123 } 124 125 PRStatus FetchFile(PRFileDesc* in, PRFileDesc* out) { 126 char buf[FCOPY_BUFFER_SIZE]; 127 PRInt32 nBytes; 128 129 while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) { 130 if (PR_Write(out, buf, nBytes) != nBytes) { 131 fprintf(stderr, "httpget: cannot write to file\n"); 132 return PR_FAILURE; 133 } 134 } 135 if (nBytes < 0) { 136 /* Input buffer is empty and end of stream */ 137 return PR_SUCCESS; 138 } 139 while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) { 140 if (PR_Write(out, buf, nBytes) != nBytes) { 141 fprintf(stderr, "httpget: cannot write to file\n"); 142 return PR_FAILURE; 143 } 144 } 145 if (nBytes < 0) { 146 fprintf(stderr, "httpget: cannot read from socket\n"); 147 return PR_FAILURE; 148 } 149 return PR_SUCCESS; 150 } 151 152 PRStatus FastFetchFile(PRFileDesc* in, PRFileDesc* out, PRUint32 size) { 153 PRInt32 nBytes; 154 PRFileMap* outfMap; 155 void* addr; 156 char* start; 157 PRUint32 rem; 158 PRUint32 bytesToRead; 159 PRStatus rv; 160 PRInt64 sz64; 161 162 LL_UI2L(sz64, size); 163 outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE); 164 PR_ASSERT(outfMap); 165 addr = PR_MemMap(outfMap, LL_ZERO, size); 166 if (addr == NULL) { 167 fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(), 168 PR_GetOSError()); 169 170 PR_CloseFileMap(outfMap); 171 return PR_FAILURE; 172 } 173 start = (char*)addr; 174 rem = size; 175 while ((nBytes = DrainInputBuffer(start, rem)) > 0) { 176 start += nBytes; 177 rem -= nBytes; 178 } 179 if (nBytes < 0) { 180 /* Input buffer is empty and end of stream */ 181 return PR_SUCCESS; 182 } 183 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; 184 while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) { 185 start += nBytes; 186 rem -= nBytes; 187 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; 188 } 189 if (nBytes < 0) { 190 fprintf(stderr, "httpget: cannot read from socket\n"); 191 return PR_FAILURE; 192 } 193 rv = PR_MemUnmap(addr, size); 194 PR_ASSERT(rv == PR_SUCCESS); 195 rv = PR_CloseFileMap(outfMap); 196 PR_ASSERT(rv == PR_SUCCESS); 197 return PR_SUCCESS; 198 } 199 200 PRStatus ParseURL(char* url, char* host, PRUint32 hostSize, char* port, 201 PRUint32 portSize, char* path, PRUint32 pathSize) { 202 char *start, *end; 203 char* dst; 204 char* hostEnd; 205 char* portEnd; 206 char* pathEnd; 207 208 if (strncmp(url, "http", 4)) { 209 fprintf(stderr, "httpget: the protocol must be http\n"); 210 return PR_FAILURE; 211 } 212 if (strncmp(url + 4, "://", 3) || url[7] == '\0') { 213 fprintf(stderr, "httpget: malformed URL: %s\n", url); 214 return PR_FAILURE; 215 } 216 217 start = end = url + 7; 218 dst = host; 219 hostEnd = host + hostSize; 220 while (*end && *end != ':' && *end != '/') { 221 if (dst == hostEnd - 1) { 222 fprintf(stderr, "httpget: host name too long\n"); 223 return PR_FAILURE; 224 } 225 *(dst++) = *(end++); 226 } 227 *dst = '\0'; 228 229 if (*end == '\0') { 230 PR_snprintf(port, portSize, "%d", 80); 231 PR_snprintf(path, pathSize, "%s", "/"); 232 return PR_SUCCESS; 233 } 234 235 if (*end == ':') { 236 end++; 237 dst = port; 238 portEnd = port + portSize; 239 while (*end && *end != '/') { 240 if (dst == portEnd - 1) { 241 fprintf(stderr, "httpget: port number too long\n"); 242 return PR_FAILURE; 243 } 244 *(dst++) = *(end++); 245 } 246 *dst = '\0'; 247 if (*end == '\0') { 248 PR_snprintf(path, pathSize, "%s", "/"); 249 return PR_SUCCESS; 250 } 251 } else { 252 PR_snprintf(port, portSize, "%d", 80); 253 } 254 255 dst = path; 256 pathEnd = path + pathSize; 257 while (*end) { 258 if (dst == pathEnd - 1) { 259 fprintf(stderr, "httpget: file pathname too long\n"); 260 return PR_FAILURE; 261 } 262 *(dst++) = *(end++); 263 } 264 *dst = '\0'; 265 return PR_SUCCESS; 266 } 267 268 void PrintUsage(void) { 269 fprintf(stderr, 270 "usage: httpget url\n" 271 " httpget -o outputfile url\n" 272 " httpget url -o outputfile\n"); 273 } 274 275 int main(int argc, char** argv) { 276 PRHostEnt hostentry; 277 char buf[PR_NETDB_BUF_SIZE]; 278 PRNetAddr addr; 279 PRFileDesc *socket = NULL, *file = NULL; 280 PRIntn cmdSize; 281 char host[HOST_SIZE]; 282 char port[PORT_SIZE]; 283 char path[PATH_SIZE]; 284 char line[LINE_SIZE]; 285 int exitStatus = 0; 286 PRBool endOfHeader = PR_FALSE; 287 char* url; 288 char* fileName = NULL; 289 PRUint32 fileSize; 290 291 if (argc != 2 && argc != 4) { 292 PrintUsage(); 293 exit(1); 294 } 295 296 if (argc == 2) { 297 /* 298 * case 1: httpget url 299 */ 300 url = argv[1]; 301 } else { 302 if (strcmp(argv[1], "-o") == 0) { 303 /* 304 * case 2: httpget -o outputfile url 305 */ 306 fileName = argv[2]; 307 url = argv[3]; 308 } else { 309 /* 310 * case 3: httpget url -o outputfile 311 */ 312 url = argv[1]; 313 if (strcmp(argv[2], "-o") != 0) { 314 PrintUsage(); 315 exit(1); 316 } 317 fileName = argv[3]; 318 } 319 } 320 321 if (ParseURL(url, host, sizeof(host), port, sizeof(port), path, 322 sizeof(path)) == PR_FAILURE) { 323 exit(1); 324 } 325 326 if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry) == PR_FAILURE) { 327 fprintf(stderr, "httpget: unknown host name: %s\n", host); 328 exit(1); 329 } 330 331 addr.inet.family = PR_AF_INET; 332 addr.inet.port = PR_htons((short)atoi(port)); 333 addr.inet.ip = *((PRUint32*)hostentry.h_addr_list[0]); 334 335 socket = PR_NewTCPSocket(); 336 if (socket == NULL) { 337 fprintf(stderr, "httpget: cannot create new tcp socket\n"); 338 exit(1); 339 } 340 341 if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) { 342 fprintf(stderr, "httpget: cannot connect to http server\n"); 343 exitStatus = 1; 344 goto done; 345 } 346 347 if (fileName == NULL) { 348 file = PR_STDOUT; 349 } else { 350 file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE, 00777); 351 if (file == NULL) { 352 fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n", fileName, 353 PR_GetError(), PR_GetOSError()); 354 exitStatus = 1; 355 goto done; 356 } 357 } 358 359 cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path); 360 PR_ASSERT(cmdSize == 361 (PRIntn)strlen("GET HTTP/1.0\r\n\r\n") + (PRIntn)strlen(path)); 362 if (PR_Write(socket, buf, cmdSize) != cmdSize) { 363 fprintf(stderr, "httpget: cannot write to http server\n"); 364 exitStatus = 1; 365 goto done; 366 } 367 368 if (ReadLine(socket, line, sizeof(line)) <= 0) { 369 fprintf(stderr, "httpget: cannot read line from http server\n"); 370 exitStatus = 1; 371 goto done; 372 } 373 374 /* HTTP response: 200 == OK */ 375 if (strstr(line, "200") == NULL) { 376 fprintf(stderr, "httpget: %s\n", line); 377 exitStatus = 1; 378 goto done; 379 } 380 381 while (ReadLine(socket, line, sizeof(line)) > 0) { 382 if (line[0] == '\n') { 383 endOfHeader = PR_TRUE; 384 break; 385 } 386 if (strncmp(line, "Content-Length", 14) == 0 || 387 strncmp(line, "Content-length", 14) == 0) { 388 char* p = line + 14; 389 390 while (*p == ' ' || *p == '\t') { 391 p++; 392 } 393 if (*p != ':') { 394 continue; 395 } 396 p++; 397 while (*p == ' ' || *p == '\t') { 398 p++; 399 } 400 fileSize = 0; 401 while ('0' <= *p && *p <= '9') { 402 fileSize = 10 * fileSize + (*p - '0'); 403 p++; 404 } 405 } 406 } 407 if (endOfHeader == PR_FALSE) { 408 fprintf(stderr, "httpget: cannot read line from http server\n"); 409 exitStatus = 1; 410 goto done; 411 } 412 413 if (fileName == NULL || fileSize == 0) { 414 FetchFile(socket, file); 415 } else { 416 FastFetchFile(socket, file, fileSize); 417 } 418 419 done: 420 if (socket) { 421 PR_Close(socket); 422 } 423 if (file) { 424 PR_Close(file); 425 } 426 PR_Cleanup(); 427 return exitStatus; 428 }