tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

httpget.c (10691B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 /*
      7 * Author: Wan-Teh Chang
      8 *
      9 * Given an HTTP URL, httpget uses the GET method to fetch the file.
     10 * The fetched file is written to stdout by default, or can be
     11 * saved in an output file.
     12 *
     13 * This is a single-threaded program.
     14 */
     15 
     16 #include "prio.h"
     17 #include "prnetdb.h"
     18 #include "prlog.h"
     19 #include "prerror.h"
     20 #include "prprf.h"
     21 #include "prinit.h"
     22 
     23 #include <stdio.h>
     24 #include <string.h>
     25 #include <stdlib.h> /* for atoi */
     26 
     27 #define FCOPY_BUFFER_SIZE (16 * 1024)
     28 #define INPUT_BUFFER_SIZE 1024
     29 #define LINE_SIZE 512
     30 #define HOST_SIZE 256
     31 #define PORT_SIZE 32
     32 #define PATH_SIZE 512
     33 
     34 /*
     35 * A buffer for storing the excess input data for ReadLine.
     36 * The data in the buffer starts from (including) the element pointed to
     37 * by inputHead, and ends just before (not including) the element pointed
     38 * to by inputTail.  The buffer is empty if inputHead == inputTail.
     39 */
     40 
     41 static char inputBuf[INPUT_BUFFER_SIZE];
     42 /*
     43 * inputBufEnd points just past the end of inputBuf
     44 */
     45 static char* inputBufEnd = inputBuf + sizeof(inputBuf);
     46 static char* inputHead = inputBuf;
     47 static char* inputTail = inputBuf;
     48 
     49 static PRBool endOfStream = PR_FALSE;
     50 
     51 /*
     52 * ReadLine --
     53 *
     54 * Read in a line of text, terminated by CRLF or LF, from fd into buf.
     55 * The terminating CRLF or LF is included (always as '\n').  The text
     56 * in buf is terminated by a null byte.  The excess bytes are stored in
     57 * inputBuf for use in the next ReadLine call or FetchFile call.
     58 * Returns the number of bytes in buf.  0 means end of stream.  Returns
     59 * -1 if read fails.
     60 */
     61 
     62 PRInt32 ReadLine(PRFileDesc* fd, char* buf, PRUint32 bufSize) {
     63  char* dst = buf;
     64  char* bufEnd = buf + bufSize; /* just past the end of buf */
     65  PRBool lineFound = PR_FALSE;
     66  char* crPtr = NULL; /* points to the CR ('\r') character */
     67  PRInt32 nRead;
     68 
     69 loop:
     70  PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail &&
     71            inputTail <= inputBufEnd);
     72  while (lineFound == PR_FALSE && inputHead != inputTail && dst < bufEnd - 1) {
     73    if (*inputHead == '\r') {
     74      crPtr = dst;
     75    } else if (*inputHead == '\n') {
     76      lineFound = PR_TRUE;
     77      if (crPtr == dst - 1) {
     78        dst--;
     79      }
     80    }
     81    *(dst++) = *(inputHead++);
     82  }
     83  if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
     84    *dst = '\0';
     85    return dst - buf;
     86  }
     87 
     88  /*
     89   * The input buffer should be empty now
     90   */
     91  PR_ASSERT(inputHead == inputTail);
     92 
     93  nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
     94  if (nRead == -1) {
     95    *dst = '\0';
     96    return -1;
     97  } else if (nRead == 0) {
     98    endOfStream = PR_TRUE;
     99    *dst = '\0';
    100    return dst - buf;
    101  }
    102  inputHead = inputBuf;
    103  inputTail = inputBuf + nRead;
    104  goto loop;
    105 }
    106 
    107 PRInt32 DrainInputBuffer(char* buf, PRUint32 bufSize) {
    108  PRInt32 nBytes = inputTail - inputHead;
    109 
    110  if (nBytes == 0) {
    111    if (endOfStream) {
    112      return -1;
    113    } else {
    114      return 0;
    115    }
    116  }
    117  if ((PRInt32)bufSize < nBytes) {
    118    nBytes = bufSize;
    119  }
    120  memcpy(buf, inputHead, nBytes);
    121  inputHead += nBytes;
    122  return nBytes;
    123 }
    124 
    125 PRStatus FetchFile(PRFileDesc* in, PRFileDesc* out) {
    126  char buf[FCOPY_BUFFER_SIZE];
    127  PRInt32 nBytes;
    128 
    129  while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
    130    if (PR_Write(out, buf, nBytes) != nBytes) {
    131      fprintf(stderr, "httpget: cannot write to file\n");
    132      return PR_FAILURE;
    133    }
    134  }
    135  if (nBytes < 0) {
    136    /* Input buffer is empty and end of stream */
    137    return PR_SUCCESS;
    138  }
    139  while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
    140    if (PR_Write(out, buf, nBytes) != nBytes) {
    141      fprintf(stderr, "httpget: cannot write to file\n");
    142      return PR_FAILURE;
    143    }
    144  }
    145  if (nBytes < 0) {
    146    fprintf(stderr, "httpget: cannot read from socket\n");
    147    return PR_FAILURE;
    148  }
    149  return PR_SUCCESS;
    150 }
    151 
    152 PRStatus FastFetchFile(PRFileDesc* in, PRFileDesc* out, PRUint32 size) {
    153  PRInt32 nBytes;
    154  PRFileMap* outfMap;
    155  void* addr;
    156  char* start;
    157  PRUint32 rem;
    158  PRUint32 bytesToRead;
    159  PRStatus rv;
    160  PRInt64 sz64;
    161 
    162  LL_UI2L(sz64, size);
    163  outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
    164  PR_ASSERT(outfMap);
    165  addr = PR_MemMap(outfMap, LL_ZERO, size);
    166  if (addr == NULL) {
    167    fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
    168            PR_GetOSError());
    169 
    170    PR_CloseFileMap(outfMap);
    171    return PR_FAILURE;
    172  }
    173  start = (char*)addr;
    174  rem = size;
    175  while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
    176    start += nBytes;
    177    rem -= nBytes;
    178  }
    179  if (nBytes < 0) {
    180    /* Input buffer is empty and end of stream */
    181    return PR_SUCCESS;
    182  }
    183  bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
    184  while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
    185    start += nBytes;
    186    rem -= nBytes;
    187    bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
    188  }
    189  if (nBytes < 0) {
    190    fprintf(stderr, "httpget: cannot read from socket\n");
    191    return PR_FAILURE;
    192  }
    193  rv = PR_MemUnmap(addr, size);
    194  PR_ASSERT(rv == PR_SUCCESS);
    195  rv = PR_CloseFileMap(outfMap);
    196  PR_ASSERT(rv == PR_SUCCESS);
    197  return PR_SUCCESS;
    198 }
    199 
    200 PRStatus ParseURL(char* url, char* host, PRUint32 hostSize, char* port,
    201                  PRUint32 portSize, char* path, PRUint32 pathSize) {
    202  char *start, *end;
    203  char* dst;
    204  char* hostEnd;
    205  char* portEnd;
    206  char* pathEnd;
    207 
    208  if (strncmp(url, "http", 4)) {
    209    fprintf(stderr, "httpget: the protocol must be http\n");
    210    return PR_FAILURE;
    211  }
    212  if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
    213    fprintf(stderr, "httpget: malformed URL: %s\n", url);
    214    return PR_FAILURE;
    215  }
    216 
    217  start = end = url + 7;
    218  dst = host;
    219  hostEnd = host + hostSize;
    220  while (*end && *end != ':' && *end != '/') {
    221    if (dst == hostEnd - 1) {
    222      fprintf(stderr, "httpget: host name too long\n");
    223      return PR_FAILURE;
    224    }
    225    *(dst++) = *(end++);
    226  }
    227  *dst = '\0';
    228 
    229  if (*end == '\0') {
    230    PR_snprintf(port, portSize, "%d", 80);
    231    PR_snprintf(path, pathSize, "%s", "/");
    232    return PR_SUCCESS;
    233  }
    234 
    235  if (*end == ':') {
    236    end++;
    237    dst = port;
    238    portEnd = port + portSize;
    239    while (*end && *end != '/') {
    240      if (dst == portEnd - 1) {
    241        fprintf(stderr, "httpget: port number too long\n");
    242        return PR_FAILURE;
    243      }
    244      *(dst++) = *(end++);
    245    }
    246    *dst = '\0';
    247    if (*end == '\0') {
    248      PR_snprintf(path, pathSize, "%s", "/");
    249      return PR_SUCCESS;
    250    }
    251  } else {
    252    PR_snprintf(port, portSize, "%d", 80);
    253  }
    254 
    255  dst = path;
    256  pathEnd = path + pathSize;
    257  while (*end) {
    258    if (dst == pathEnd - 1) {
    259      fprintf(stderr, "httpget: file pathname too long\n");
    260      return PR_FAILURE;
    261    }
    262    *(dst++) = *(end++);
    263  }
    264  *dst = '\0';
    265  return PR_SUCCESS;
    266 }
    267 
    268 void PrintUsage(void) {
    269  fprintf(stderr,
    270          "usage: httpget url\n"
    271          "       httpget -o outputfile url\n"
    272          "       httpget url -o outputfile\n");
    273 }
    274 
    275 int main(int argc, char** argv) {
    276  PRHostEnt hostentry;
    277  char buf[PR_NETDB_BUF_SIZE];
    278  PRNetAddr addr;
    279  PRFileDesc *socket = NULL, *file = NULL;
    280  PRIntn cmdSize;
    281  char host[HOST_SIZE];
    282  char port[PORT_SIZE];
    283  char path[PATH_SIZE];
    284  char line[LINE_SIZE];
    285  int exitStatus = 0;
    286  PRBool endOfHeader = PR_FALSE;
    287  char* url;
    288  char* fileName = NULL;
    289  PRUint32 fileSize;
    290 
    291  if (argc != 2 && argc != 4) {
    292    PrintUsage();
    293    exit(1);
    294  }
    295 
    296  if (argc == 2) {
    297    /*
    298     * case 1: httpget url
    299     */
    300    url = argv[1];
    301  } else {
    302    if (strcmp(argv[1], "-o") == 0) {
    303      /*
    304       * case 2: httpget -o outputfile url
    305       */
    306      fileName = argv[2];
    307      url = argv[3];
    308    } else {
    309      /*
    310       * case 3: httpget url -o outputfile
    311       */
    312      url = argv[1];
    313      if (strcmp(argv[2], "-o") != 0) {
    314        PrintUsage();
    315        exit(1);
    316      }
    317      fileName = argv[3];
    318    }
    319  }
    320 
    321  if (ParseURL(url, host, sizeof(host), port, sizeof(port), path,
    322               sizeof(path)) == PR_FAILURE) {
    323    exit(1);
    324  }
    325 
    326  if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry) == PR_FAILURE) {
    327    fprintf(stderr, "httpget: unknown host name: %s\n", host);
    328    exit(1);
    329  }
    330 
    331  addr.inet.family = PR_AF_INET;
    332  addr.inet.port = PR_htons((short)atoi(port));
    333  addr.inet.ip = *((PRUint32*)hostentry.h_addr_list[0]);
    334 
    335  socket = PR_NewTCPSocket();
    336  if (socket == NULL) {
    337    fprintf(stderr, "httpget: cannot create new tcp socket\n");
    338    exit(1);
    339  }
    340 
    341  if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
    342    fprintf(stderr, "httpget: cannot connect to http server\n");
    343    exitStatus = 1;
    344    goto done;
    345  }
    346 
    347  if (fileName == NULL) {
    348    file = PR_STDOUT;
    349  } else {
    350    file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE, 00777);
    351    if (file == NULL) {
    352      fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n", fileName,
    353              PR_GetError(), PR_GetOSError());
    354      exitStatus = 1;
    355      goto done;
    356    }
    357  }
    358 
    359  cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
    360  PR_ASSERT(cmdSize ==
    361            (PRIntn)strlen("GET  HTTP/1.0\r\n\r\n") + (PRIntn)strlen(path));
    362  if (PR_Write(socket, buf, cmdSize) != cmdSize) {
    363    fprintf(stderr, "httpget: cannot write to http server\n");
    364    exitStatus = 1;
    365    goto done;
    366  }
    367 
    368  if (ReadLine(socket, line, sizeof(line)) <= 0) {
    369    fprintf(stderr, "httpget: cannot read line from http server\n");
    370    exitStatus = 1;
    371    goto done;
    372  }
    373 
    374  /* HTTP response: 200 == OK */
    375  if (strstr(line, "200") == NULL) {
    376    fprintf(stderr, "httpget: %s\n", line);
    377    exitStatus = 1;
    378    goto done;
    379  }
    380 
    381  while (ReadLine(socket, line, sizeof(line)) > 0) {
    382    if (line[0] == '\n') {
    383      endOfHeader = PR_TRUE;
    384      break;
    385    }
    386    if (strncmp(line, "Content-Length", 14) == 0 ||
    387        strncmp(line, "Content-length", 14) == 0) {
    388      char* p = line + 14;
    389 
    390      while (*p == ' ' || *p == '\t') {
    391        p++;
    392      }
    393      if (*p != ':') {
    394        continue;
    395      }
    396      p++;
    397      while (*p == ' ' || *p == '\t') {
    398        p++;
    399      }
    400      fileSize = 0;
    401      while ('0' <= *p && *p <= '9') {
    402        fileSize = 10 * fileSize + (*p - '0');
    403        p++;
    404      }
    405    }
    406  }
    407  if (endOfHeader == PR_FALSE) {
    408    fprintf(stderr, "httpget: cannot read line from http server\n");
    409    exitStatus = 1;
    410    goto done;
    411  }
    412 
    413  if (fileName == NULL || fileSize == 0) {
    414    FetchFile(socket, file);
    415  } else {
    416    FastFetchFile(socket, file, fileSize);
    417  }
    418 
    419 done:
    420  if (socket) {
    421    PR_Close(socket);
    422  }
    423  if (file) {
    424    PR_Close(file);
    425  }
    426  PR_Cleanup();
    427  return exitStatus;
    428 }