tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

javascript.c (57966B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "signtool.h"
      6 #include <prmem.h>
      7 #include <prio.h>
      8 #include <prenv.h>
      9 
     10 static int javascript_fn(char *relpath, char *basedir, char *reldir,
     11                         char *filename, void *arg);
     12 static int extract_js(char *filename);
     13 static int copyinto(char *from, char *to);
     14 static PRStatus ensureExists(char *base, char *path);
     15 static int make_dirs(char *path, PRInt32 file_perms);
     16 
     17 static char *jartree = NULL;
     18 static int idOrdinal;
     19 static PRBool dumpParse = PR_FALSE;
     20 
     21 static char *event_handlers[] = {
     22    "onAbort",
     23    "onBlur",
     24    "onChange",
     25    "onClick",
     26    "onDblClick",
     27    "onDragDrop",
     28    "onError",
     29    "onFocus",
     30    "onKeyDown",
     31    "onKeyPress",
     32    "onKeyUp",
     33    "onLoad",
     34    "onMouseDown",
     35    "onMouseMove",
     36    "onMouseOut",
     37    "onMouseOver",
     38    "onMouseUp",
     39    "onMove",
     40    "onReset",
     41    "onResize",
     42    "onSelect",
     43    "onSubmit",
     44    "onUnload"
     45 };
     46 
     47 static int num_handlers = 23;
     48 
     49 /*
     50 *  I n l i n e J a v a S c r i p t
     51 *
     52 *  Javascript signing. Instead of passing an archive to signtool,
     53 *  a directory containing html files is given. Archives are created
     54 *  from the archive= and src= tag attributes inside the html,
     55 *  as appropriate. Then the archives are signed.
     56 *
     57 */
     58 int
     59 InlineJavaScript(char *dir, PRBool recurse)
     60 {
     61    jartree = dir;
     62    if (verbosity >= 0) {
     63        PR_fprintf(outputFD, "\nGenerating inline signatures from HTML files in: %s\n",
     64                   dir);
     65    }
     66    if (PR_GetEnvSecure("SIGNTOOL_DUMP_PARSE")) {
     67        dumpParse = PR_TRUE;
     68    }
     69 
     70    return foreach (dir, "", javascript_fn, recurse, PR_FALSE /*include dirs*/,
     71                    (void *)NULL);
     72 }
     73 
     74 /************************************************************************
     75 *
     76 * j a v a s c r i p t _ f n
     77 */
     78 static int
     79 javascript_fn(char *relpath, char *basedir, char *reldir, char *filename, void *arg)
     80 {
     81    char fullname[FNSIZE];
     82 
     83    /* only process inline scripts from .htm, .html, and .shtml*/
     84 
     85    if (!(PL_strcaserstr(filename, ".htm") == filename + strlen(filename) - 4) &&
     86        !(PL_strcaserstr(filename, ".html") == filename + strlen(filename) - 5) &&
     87        !(PL_strcaserstr(filename, ".shtml") == filename + strlen(filename) - 6)) {
     88        return 0;
     89    }
     90 
     91    /* don't process scripts that signtool has already
     92     extracted (those that are inside .arc directories) */
     93 
     94    if (PL_strcaserstr(filename, ".arc") == filename + strlen(filename) - 4)
     95        return 0;
     96 
     97    if (verbosity >= 0) {
     98        PR_fprintf(outputFD, "Processing HTML file: %s\n", relpath);
     99    }
    100 
    101    /* reset firstArchive at top of each HTML file */
    102 
    103    /* skip directories that contain extracted scripts */
    104 
    105    if (PL_strcaserstr(reldir, ".arc") == reldir + strlen(reldir) - 4)
    106        return 0;
    107 
    108    snprintf(fullname, sizeof(fullname), "%s/%s", basedir, relpath);
    109    return extract_js(fullname);
    110 }
    111 
    112 /*===========================================================================
    113 =
    114 = D A T A   S T R U C T U R E S
    115 =
    116 */
    117 typedef enum {
    118    TEXT_HTML_STATE = 0,
    119    SCRIPT_HTML_STATE
    120 }
    121 
    122 HTML_STATE;
    123 
    124 typedef enum {
    125    /* we start in the start state */
    126    START_STATE,
    127 
    128    /* We are looking for or reading in an attribute */
    129    GET_ATT_STATE,
    130 
    131    /* We're burning ws before finding an attribute */
    132    PRE_ATT_WS_STATE,
    133 
    134    /* We're burning ws after an attribute.  Looking for an '='. */
    135    POST_ATT_WS_STATE,
    136 
    137    /* We're burning ws after an '=', waiting for a value */
    138    PRE_VAL_WS_STATE,
    139 
    140    /* We're reading in a value */
    141    GET_VALUE_STATE,
    142 
    143    /* We're reading in a value that's inside quotes */
    144    GET_QUOTED_VAL_STATE,
    145 
    146    /* We've encountered the closing '>' */
    147    DONE_STATE,
    148 
    149    /* Error state */
    150    ERR_STATE
    151 }
    152 
    153 TAG_STATE;
    154 
    155 typedef struct AVPair_Str {
    156    char *attribute;
    157    char *value;
    158    unsigned int valueLine; /* the line that the value ends on */
    159    struct AVPair_Str *next;
    160 } AVPair;
    161 
    162 typedef enum {
    163    APPLET_TAG,
    164    SCRIPT_TAG,
    165    LINK_TAG,
    166    STYLE_TAG,
    167    COMMENT_TAG,
    168    OTHER_TAG
    169 }
    170 
    171 TAG_TYPE;
    172 
    173 typedef struct {
    174    TAG_TYPE type;
    175    AVPair *attList;
    176    AVPair *attListTail;
    177    char *text;
    178 } TagItem;
    179 
    180 typedef enum {
    181    TAG_ITEM,
    182    TEXT_ITEM
    183 }
    184 
    185 ITEM_TYPE;
    186 
    187 typedef struct HTMLItem_Str {
    188    unsigned int startLine;
    189    unsigned int endLine;
    190    ITEM_TYPE type;
    191    union {
    192        TagItem *tag;
    193        char *text;
    194    } item;
    195    struct HTMLItem_Str *next;
    196 } HTMLItem;
    197 
    198 typedef struct {
    199    PRFileDesc *fd;
    200    PRInt32 curIndex;
    201    PRBool IsEOF;
    202 #define FILE_BUFFER_BUFSIZE 512
    203    char buf[FILE_BUFFER_BUFSIZE];
    204    PRInt32 startOffset;
    205    PRInt32 maxIndex;
    206    unsigned int lineNum;
    207 } FileBuffer;
    208 
    209 /*===========================================================================
    210 =
    211 = F U N C T I O N S
    212 =
    213 */
    214 static HTMLItem *CreateTextItem(char *text, unsigned int startline,
    215                                unsigned int endline);
    216 static HTMLItem *CreateTagItem(TagItem *ti, unsigned int startline,
    217                               unsigned int endline);
    218 static TagItem *ProcessTag(FileBuffer *fb, char **errStr);
    219 static void DestroyHTMLItem(HTMLItem *item);
    220 static void DestroyTagItem(TagItem *ti);
    221 static TAG_TYPE GetTagType(char *att);
    222 static FileBuffer *FB_Create(PRFileDesc *fd);
    223 static int FB_GetChar(FileBuffer *fb);
    224 static PRInt32 FB_GetPointer(FileBuffer *fb);
    225 static PRInt32 FB_GetRange(FileBuffer *fb, PRInt32 start, PRInt32 end,
    226                           char **buf);
    227 static unsigned int FB_GetLineNum(FileBuffer *fb);
    228 static void FB_Destroy(FileBuffer *fb);
    229 static void PrintTagItem(PRFileDesc *fd, TagItem *ti);
    230 static void PrintHTMLStream(PRFileDesc *fd, HTMLItem *head);
    231 
    232 /************************************************************************
    233 *
    234 * C r e a t e T e x t I t e m
    235 */
    236 static HTMLItem *
    237 CreateTextItem(char *text, unsigned int startline, unsigned int endline)
    238 {
    239    HTMLItem *item;
    240 
    241    item = PR_Malloc(sizeof(HTMLItem));
    242    if (!item) {
    243        return NULL;
    244    }
    245 
    246    item->type = TEXT_ITEM;
    247    item->item.text = text;
    248    item->next = NULL;
    249    item->startLine = startline;
    250    item->endLine = endline;
    251 
    252    return item;
    253 }
    254 
    255 /************************************************************************
    256 *
    257 * C r e a t e T a g I t e m
    258 */
    259 static HTMLItem *
    260 CreateTagItem(TagItem *ti, unsigned int startline, unsigned int endline)
    261 {
    262    HTMLItem *item;
    263 
    264    item = PR_Malloc(sizeof(HTMLItem));
    265    if (!item) {
    266        return NULL;
    267    }
    268 
    269    item->type = TAG_ITEM;
    270    item->item.tag = ti;
    271    item->next = NULL;
    272    item->startLine = startline;
    273    item->endLine = endline;
    274 
    275    return item;
    276 }
    277 
    278 static PRBool
    279 isAttChar(int c)
    280 {
    281    return (isalnum(c) || c == '/' || c == '-');
    282 }
    283 
    284 /************************************************************************
    285 *
    286 * P r o c e s s T a g
    287 */
    288 static TagItem *
    289 ProcessTag(FileBuffer *fb, char **errStr)
    290 {
    291    TAG_STATE state;
    292    PRInt32 startText, startID, curPos;
    293    PRBool firstAtt;
    294    int curchar;
    295    TagItem *ti = NULL;
    296    AVPair *curPair = NULL;
    297    char quotechar = '\0';
    298    unsigned int linenum;
    299    unsigned int startline;
    300 
    301    state = START_STATE;
    302 
    303    startID = FB_GetPointer(fb);
    304    startText = startID;
    305    firstAtt = PR_TRUE;
    306 
    307    ti = (TagItem *)PR_Malloc(sizeof(TagItem));
    308    if (!ti)
    309        out_of_memory();
    310    ti->type = OTHER_TAG;
    311    ti->attList = NULL;
    312    ti->attListTail = NULL;
    313    ti->text = NULL;
    314 
    315    startline = FB_GetLineNum(fb);
    316 
    317    while (state != DONE_STATE && state != ERR_STATE) {
    318        linenum = FB_GetLineNum(fb);
    319        curchar = FB_GetChar(fb);
    320        if (curchar == EOF) {
    321            *errStr = PR_smprintf(
    322                "line %d: Unexpected end-of-file while parsing tag starting at line %d.\n",
    323                linenum, startline);
    324            state = ERR_STATE;
    325            continue;
    326        }
    327 
    328        switch (state) {
    329            case START_STATE:
    330                if (curchar == '!') {
    331                    /*
    332                     * SGML tag or comment
    333                     * Here's the general rule for SGML tags.  Everything from
    334                     * <! to > is the tag.  Inside the tag, comments are
    335                     * delimited with --.  So we are looking for the first '>'
    336                     * that is not commented out, that is, not inside a pair
    337                     * of --: <!DOCTYPE --this is a comment >(psyche!)   -->
    338                     */
    339 
    340                    PRBool inComment = PR_FALSE;
    341                    short hyphenCount = 0; /* number of consecutive hyphens */
    342 
    343                    while (1) {
    344                        linenum = FB_GetLineNum(fb);
    345                        curchar = FB_GetChar(fb);
    346                        if (curchar == EOF) {
    347                            /* Uh oh, EOF inside comment */
    348                            *errStr = PR_smprintf(
    349                                "line %d: Unexpected end-of-file inside comment starting at line %d.\n",
    350                                linenum, startline);
    351                            state = ERR_STATE;
    352                            break;
    353                        }
    354                        if (curchar == '-') {
    355                            if (hyphenCount == 1) {
    356                                /* This is a comment delimiter */
    357                                inComment = !inComment;
    358                                hyphenCount = 0;
    359                            } else {
    360                                /* beginning of a comment delimiter? */
    361                                hyphenCount = 1;
    362                            }
    363                        } else if (curchar == '>') {
    364                            if (!inComment) {
    365                                /* This is the end of the tag */
    366                                state = DONE_STATE;
    367                                break;
    368                            } else {
    369                                /* The > is inside a comment, so it's not
    370                                 * really the end of the tag */
    371                                hyphenCount = 0;
    372                            }
    373                        } else {
    374                            hyphenCount = 0;
    375                        }
    376                    }
    377                    ti->type = COMMENT_TAG;
    378                    break;
    379                }
    380            /* fall through */
    381            case GET_ATT_STATE:
    382                if (isspace(curchar) || curchar == '=' || curchar == '>') {
    383                    /* end of the current attribute */
    384                    curPos = FB_GetPointer(fb) - 2;
    385                    if (curPos >= startID) {
    386                        /* We have an attribute */
    387                        curPair = (AVPair *)PR_Malloc(sizeof(AVPair));
    388                        if (!curPair)
    389                            out_of_memory();
    390                        curPair->value = NULL;
    391                        curPair->next = NULL;
    392                        FB_GetRange(fb, startID, curPos,
    393                                    &curPair->attribute);
    394 
    395                        /* Stick this attribute on the list */
    396                        if (ti->attListTail) {
    397                            ti->attListTail->next = curPair;
    398                            ti->attListTail = curPair;
    399                        } else {
    400                            ti->attList = ti->attListTail =
    401                                curPair;
    402                        }
    403 
    404                        /* If this is the first attribute, find the type of tag
    405                         * based on it. Also, start saving the text of the tag. */
    406                        if (firstAtt) {
    407                            ti->type = GetTagType(curPair->attribute);
    408                            startText = FB_GetPointer(fb) -
    409                                        1;
    410                            firstAtt = PR_FALSE;
    411                        }
    412                    } else {
    413                        if (curchar == '=') {
    414                            /* If we don't have any attribute but we do have an
    415                             * equal sign, that's an error */
    416                            *errStr = PR_smprintf("line %d: Malformed tag starting at line %d.\n",
    417                                                  linenum, startline);
    418                            state = ERR_STATE;
    419                            break;
    420                        }
    421                    }
    422 
    423                    /* Compute next state */
    424                    if (curchar == '=') {
    425                        startID = FB_GetPointer(fb);
    426                        state = PRE_VAL_WS_STATE;
    427                    } else if (curchar == '>') {
    428                        state = DONE_STATE;
    429                    } else if (curPair) {
    430                        state = POST_ATT_WS_STATE;
    431                    } else {
    432                        state = PRE_ATT_WS_STATE;
    433                    }
    434                } else if (isAttChar(curchar)) {
    435                    /* Just another char in the attribute. Do nothing */
    436                    state = GET_ATT_STATE;
    437                } else {
    438                    /* bogus char */
    439                    *errStr = PR_smprintf("line %d: Bogus chararacter '%c' in tag.\n",
    440                                          linenum, curchar);
    441                    state = ERR_STATE;
    442                    break;
    443                }
    444                break;
    445            case PRE_ATT_WS_STATE:
    446                if (curchar == '>') {
    447                    state = DONE_STATE;
    448                } else if (isspace(curchar)) {
    449                    /* more whitespace, do nothing */
    450                } else if (isAttChar(curchar)) {
    451                    /* starting another attribute */
    452                    startID = FB_GetPointer(fb) - 1;
    453                    state = GET_ATT_STATE;
    454                } else {
    455                    /* bogus char */
    456                    *errStr = PR_smprintf("line %d: Bogus character '%c' in tag.\n",
    457                                          linenum, curchar);
    458                    state = ERR_STATE;
    459                    break;
    460                }
    461                break;
    462            case POST_ATT_WS_STATE:
    463                if (curchar == '>') {
    464                    state = DONE_STATE;
    465                } else if (isspace(curchar)) {
    466                    /* more whitespace, do nothing */
    467                } else if (isAttChar(curchar)) {
    468                    /* starting another attribute */
    469                    startID = FB_GetPointer(fb) - 1;
    470                    state = GET_ATT_STATE;
    471                } else if (curchar == '=') {
    472                    /* there was whitespace between the attribute and its equal
    473                     * sign, which means there's a value coming up */
    474                    state = PRE_VAL_WS_STATE;
    475                } else {
    476                    /* bogus char */
    477                    *errStr = PR_smprintf("line %d: Bogus character '%c' in tag.\n",
    478                                          linenum, curchar);
    479                    state = ERR_STATE;
    480                    break;
    481                }
    482                break;
    483            case PRE_VAL_WS_STATE:
    484                if (curchar == '>') {
    485                    /* premature end-of-tag (sounds like a personal problem). */
    486                    *errStr = PR_smprintf(
    487                        "line %d: End of tag while waiting for value.\n",
    488                        linenum);
    489                    state = ERR_STATE;
    490                    break;
    491                } else if (isspace(curchar)) {
    492                    /* more whitespace, do nothing */
    493                    break;
    494                } else {
    495                    /* this must be some sort of value. Fall through
    496                     * to GET_VALUE_STATE */
    497                    startID = FB_GetPointer(fb) - 1;
    498                    state = GET_VALUE_STATE;
    499                }
    500            /* Fall through if we didn't break on '>' or whitespace */
    501            case GET_VALUE_STATE:
    502                if (isspace(curchar) || curchar == '>') {
    503                    /* end of value */
    504                    curPos = FB_GetPointer(fb) - 2;
    505                    if (curPos >= startID) {
    506                        /* Grab the value */
    507                        FB_GetRange(fb, startID, curPos,
    508                                    &curPair->value);
    509                        curPair->valueLine = linenum;
    510                    } else {
    511                        /* empty value, leave as NULL */
    512                    }
    513                    if (isspace(curchar)) {
    514                        state = PRE_ATT_WS_STATE;
    515                    } else {
    516                        state = DONE_STATE;
    517                    }
    518                } else if (curchar == '\"' || curchar == '\'') {
    519                    /* quoted value.  Start recording the value inside the quote*/
    520                    startID = FB_GetPointer(fb);
    521                    state = GET_QUOTED_VAL_STATE;
    522                    PORT_Assert(quotechar == '\0');
    523                    quotechar = curchar; /* look for matching quote type */
    524                } else {
    525                    /* just more value */
    526                }
    527                break;
    528            case GET_QUOTED_VAL_STATE:
    529                PORT_Assert(quotechar != '\0');
    530                if (curchar == quotechar) {
    531                    /* end of quoted value */
    532                    curPos = FB_GetPointer(fb) - 2;
    533                    if (curPos >= startID) {
    534                        /* Grab the value */
    535                        FB_GetRange(fb, startID, curPos,
    536                                    &curPair->value);
    537                        curPair->valueLine = linenum;
    538                    } else {
    539                        /* empty value, leave it as NULL */
    540                    }
    541                    state = GET_ATT_STATE;
    542                    quotechar = '\0';
    543                    startID = FB_GetPointer(fb);
    544                } else {
    545                    /* more quoted value, continue */
    546                }
    547                break;
    548            case DONE_STATE:
    549            case ERR_STATE:
    550            default:; /* should never get here */
    551        }
    552    }
    553 
    554    if (state == DONE_STATE) {
    555        /* Get the text of the tag */
    556        curPos = FB_GetPointer(fb) - 1;
    557        FB_GetRange(fb, startText, curPos, &ti->text);
    558 
    559        /* Return the tag */
    560        return ti;
    561    }
    562 
    563    /* Uh oh, an error.  Kill the tag item*/
    564    DestroyTagItem(ti);
    565    return NULL;
    566 }
    567 
    568 /************************************************************************
    569 *
    570 * D e s t r o y H T M L I t e m
    571 */
    572 static void
    573 DestroyHTMLItem(HTMLItem *item)
    574 {
    575    if (item->type == TAG_ITEM) {
    576        DestroyTagItem(item->item.tag);
    577    } else {
    578        if (item->item.text) {
    579            PR_Free(item->item.text);
    580        }
    581    }
    582 }
    583 
    584 /************************************************************************
    585 *
    586 * D e s t r o y T a g I t e m
    587 */
    588 static void
    589 DestroyTagItem(TagItem *ti)
    590 {
    591    AVPair *temp;
    592 
    593    if (ti->text) {
    594        PR_Free(ti->text);
    595        ti->text = NULL;
    596    }
    597 
    598    while (ti->attList) {
    599        temp = ti->attList;
    600        ti->attList = ti->attList->next;
    601 
    602        if (temp->attribute) {
    603            PR_Free(temp->attribute);
    604            temp->attribute = NULL;
    605        }
    606        if (temp->value) {
    607            PR_Free(temp->value);
    608            temp->value = NULL;
    609        }
    610        PR_Free(temp);
    611    }
    612 
    613    PR_Free(ti);
    614 }
    615 
    616 /************************************************************************
    617 *
    618 * G e t T a g T y p e
    619 */
    620 static TAG_TYPE
    621 GetTagType(char *att)
    622 {
    623    if (!PORT_Strcasecmp(att, "APPLET")) {
    624        return APPLET_TAG;
    625    }
    626    if (!PORT_Strcasecmp(att, "SCRIPT")) {
    627        return SCRIPT_TAG;
    628    }
    629    if (!PORT_Strcasecmp(att, "LINK")) {
    630        return LINK_TAG;
    631    }
    632    if (!PORT_Strcasecmp(att, "STYLE")) {
    633        return STYLE_TAG;
    634    }
    635    return OTHER_TAG;
    636 }
    637 
    638 /************************************************************************
    639 *
    640 * F B _ C r e a t e
    641 */
    642 static FileBuffer *
    643 FB_Create(PRFileDesc *fd)
    644 {
    645    FileBuffer *fb;
    646    PRInt32 amountRead;
    647    PRInt32 storedOffset;
    648 
    649    fb = (FileBuffer *)PR_Malloc(sizeof(FileBuffer));
    650    fb->fd = fd;
    651    storedOffset = PR_Seek(fd, 0, PR_SEEK_CUR);
    652    PR_Seek(fd, 0, PR_SEEK_SET);
    653    fb->startOffset = 0;
    654    amountRead = PR_Read(fd, fb->buf, FILE_BUFFER_BUFSIZE);
    655    if (amountRead == -1)
    656        goto loser;
    657    fb->maxIndex = amountRead - 1;
    658    fb->curIndex = 0;
    659    fb->IsEOF = (fb->curIndex > fb->maxIndex) ? PR_TRUE : PR_FALSE;
    660    fb->lineNum = 1;
    661 
    662    PR_Seek(fd, storedOffset, PR_SEEK_SET);
    663    return fb;
    664 loser:
    665    PR_Seek(fd, storedOffset, PR_SEEK_SET);
    666    PR_Free(fb);
    667    return NULL;
    668 }
    669 
    670 /************************************************************************
    671 *
    672 * F B _ G e t C h a r
    673 */
    674 static int
    675 FB_GetChar(FileBuffer *fb)
    676 {
    677    PRInt32 storedOffset;
    678    PRInt32 amountRead;
    679    int retval = -1;
    680 
    681    if (fb->IsEOF) {
    682        return EOF;
    683    }
    684 
    685    storedOffset = PR_Seek(fb->fd, 0, PR_SEEK_CUR);
    686 
    687    retval = (unsigned char)fb->buf[fb->curIndex++];
    688    if (retval == '\n')
    689        fb->lineNum++;
    690 
    691    if (fb->curIndex > fb->maxIndex) {
    692        /* We're at the end of the buffer. Try to get some new data from the
    693         * file */
    694        fb->startOffset += fb->maxIndex + 1;
    695        PR_Seek(fb->fd, fb->startOffset, PR_SEEK_SET);
    696        amountRead = PR_Read(fb->fd, fb->buf, FILE_BUFFER_BUFSIZE);
    697        if (amountRead == -1)
    698            goto loser;
    699        fb->maxIndex = amountRead - 1;
    700        fb->curIndex = 0;
    701    }
    702 
    703    fb->IsEOF = (fb->curIndex > fb->maxIndex) ? PR_TRUE : PR_FALSE;
    704 
    705 loser:
    706    PR_Seek(fb->fd, storedOffset, PR_SEEK_SET);
    707    return retval;
    708 }
    709 
    710 /************************************************************************
    711 *
    712 * F B _ G e t L i n e N u m
    713 *
    714 */
    715 static unsigned int
    716 FB_GetLineNum(FileBuffer *fb)
    717 {
    718    return fb->lineNum;
    719 }
    720 
    721 /************************************************************************
    722 *
    723 * F B _ G e t P o i n t e r
    724 *
    725 */
    726 static PRInt32
    727 FB_GetPointer(FileBuffer *fb)
    728 {
    729    return fb->startOffset + fb->curIndex;
    730 }
    731 
    732 /************************************************************************
    733 *
    734 * F B _ G e t R a n g e
    735 *
    736 */
    737 static PRInt32
    738 FB_GetRange(FileBuffer *fb, PRInt32 start, PRInt32 end, char **buf)
    739 {
    740    PRInt32 amountRead;
    741    PRInt32 storedOffset;
    742 
    743    *buf = PR_Malloc(end - start + 2);
    744    if (*buf == NULL) {
    745        return 0;
    746    }
    747 
    748    storedOffset = PR_Seek(fb->fd, 0, PR_SEEK_CUR);
    749    PR_Seek(fb->fd, start, PR_SEEK_SET);
    750    amountRead = PR_Read(fb->fd, *buf, end - start + 1);
    751    PR_Seek(fb->fd, storedOffset, PR_SEEK_SET);
    752    if (amountRead == -1) {
    753        PR_Free(*buf);
    754        *buf = NULL;
    755        return 0;
    756    }
    757 
    758    (*buf)[end - start + 1] = '\0';
    759    return amountRead;
    760 }
    761 
    762 /************************************************************************
    763 *
    764 * F B _ D e s t r o y
    765 *
    766 */
    767 static void
    768 FB_Destroy(FileBuffer *fb)
    769 {
    770    if (fb) {
    771        PR_Free(fb);
    772    }
    773 }
    774 
    775 /************************************************************************
    776 *
    777 * P r i n t T a g I t e m
    778 *
    779 */
    780 static void
    781 PrintTagItem(PRFileDesc *fd, TagItem *ti)
    782 {
    783    AVPair *pair;
    784 
    785    PR_fprintf(fd, "TAG:\n----\nType: ");
    786    switch (ti->type) {
    787        case APPLET_TAG:
    788            PR_fprintf(fd, "applet\n");
    789            break;
    790        case SCRIPT_TAG:
    791            PR_fprintf(fd, "script\n");
    792            break;
    793        case LINK_TAG:
    794            PR_fprintf(fd, "link\n");
    795            break;
    796        case STYLE_TAG:
    797            PR_fprintf(fd, "style\n");
    798            break;
    799        case COMMENT_TAG:
    800            PR_fprintf(fd, "comment\n");
    801            break;
    802        case OTHER_TAG:
    803        default:
    804            PR_fprintf(fd, "other\n");
    805            break;
    806    }
    807 
    808    PR_fprintf(fd, "Attributes:\n");
    809    for (pair = ti->attList; pair; pair = pair->next) {
    810        PR_fprintf(fd, "\t%s=%s\n", pair->attribute,
    811                   pair->value ? pair->value : "");
    812    }
    813    PR_fprintf(fd, "Text:%s\n", ti->text ? ti->text : "");
    814 
    815    PR_fprintf(fd, "---End of tag---\n");
    816 }
    817 
    818 /************************************************************************
    819 *
    820 * P r i n t H T M L S t r e a m
    821 *
    822 */
    823 static void
    824 PrintHTMLStream(PRFileDesc *fd, HTMLItem *head)
    825 {
    826    while (head) {
    827        if (head->type == TAG_ITEM) {
    828            PrintTagItem(fd, head->item.tag);
    829        } else {
    830            PR_fprintf(fd, "\nTEXT:\n-----\n%s\n-----\n\n", head->item.text);
    831        }
    832        head = head->next;
    833    }
    834 }
    835 
    836 /************************************************************************
    837 *
    838 * S a v e I n l i n e S c r i p t
    839 *
    840 */
    841 static int
    842 SaveInlineScript(char *text, char *id, char *basedir, char *archiveDir)
    843 {
    844    char *filename = NULL;
    845    PRFileDesc *fd = NULL;
    846    int retval = -1;
    847    PRInt32 writeLen;
    848    char *ilDir = NULL;
    849 
    850    if (!text || !id || !archiveDir) {
    851        return -1;
    852    }
    853 
    854    if (dumpParse) {
    855        PR_fprintf(outputFD, "SaveInlineScript: text=%s, id=%s, \n"
    856                             "basedir=%s, archiveDir=%s\n",
    857                   text, id, basedir, archiveDir);
    858    }
    859 
    860    /* Make sure the archive directory is around */
    861    if (ensureExists(basedir, archiveDir) != PR_SUCCESS) {
    862        PR_fprintf(errorFD,
    863                   "ERROR: Unable to create archive directory %s.\n", archiveDir);
    864        errorCount++;
    865        return -1;
    866    }
    867 
    868    /* Make sure the inline script directory is around */
    869    ilDir = PR_smprintf("%s/inlineScripts", archiveDir);
    870    scriptdir = "inlineScripts";
    871    if (ensureExists(basedir, ilDir) != PR_SUCCESS) {
    872        PR_fprintf(errorFD,
    873                   "ERROR: Unable to create directory %s.\n", ilDir);
    874        errorCount++;
    875        return -1;
    876    }
    877 
    878    filename = PR_smprintf("%s/%s/%s", basedir, ilDir, id);
    879 
    880    /* If the file already exists, give a warning, then blow it away */
    881    if (PR_Access(filename, PR_ACCESS_EXISTS) == PR_SUCCESS) {
    882        PR_fprintf(errorFD,
    883                   "warning: file \"%s\" already exists--will overwrite.\n",
    884                   filename);
    885        warningCount++;
    886        if (rm_dash_r(filename)) {
    887            PR_fprintf(errorFD, "ERROR: Unable to delete %s.\n", filename);
    888            errorCount++;
    889            goto finish;
    890        }
    891    }
    892 
    893    /* Write text into file with name id */
    894    fd = PR_Open(filename, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0777);
    895    if (!fd) {
    896        PR_fprintf(errorFD, "ERROR: Unable to create file \"%s\".\n",
    897                   filename);
    898        errorCount++;
    899        goto finish;
    900    }
    901    writeLen = strlen(text);
    902    if (PR_Write(fd, text, writeLen) != writeLen) {
    903        PR_fprintf(errorFD, "ERROR: Unable to write to file \"%s\".\n",
    904                   filename);
    905        errorCount++;
    906        goto finish;
    907    }
    908 
    909    retval = 0;
    910 finish:
    911    if (filename) {
    912        PR_smprintf_free(filename);
    913    }
    914    if (ilDir) {
    915        PR_smprintf_free(ilDir);
    916    }
    917    if (fd) {
    918        PR_Close(fd);
    919    }
    920    return retval;
    921 }
    922 
    923 /************************************************************************
    924 *
    925 * S a v e U n n a m a b l e S c r i p t
    926 *
    927 */
    928 static int
    929 SaveUnnamableScript(char *text, char *basedir, char *archiveDir,
    930                    char *HTMLfilename)
    931 {
    932    char *id = NULL;
    933    char *ext = NULL;
    934    char *start = NULL;
    935    int retval = -1;
    936 
    937    if (!text || !archiveDir || !HTMLfilename) {
    938        return -1;
    939    }
    940 
    941    if (dumpParse) {
    942        PR_fprintf(outputFD, "SaveUnnamableScript: text=%s, basedir=%s,\n"
    943                             "archiveDir=%s, filename=%s\n",
    944                   text, basedir, archiveDir,
    945                   HTMLfilename);
    946    }
    947 
    948    /* Construct the filename */
    949    ext = PL_strrchr(HTMLfilename, '.');
    950    if (ext) {
    951        *ext = '\0';
    952    }
    953    for (start = HTMLfilename; strpbrk(start, "/\\");
    954         start = strpbrk(start, "/\\") + 1)
    955        /* do nothing */;
    956    if (*start == '\0')
    957        start = HTMLfilename;
    958    id = PR_smprintf("_%s%d", start, idOrdinal++);
    959    if (ext) {
    960        *ext = '.';
    961    }
    962 
    963    /* Now call SaveInlineScript to do the work */
    964    retval = SaveInlineScript(text, id, basedir, archiveDir);
    965 
    966    PR_Free(id);
    967 
    968    return retval;
    969 }
    970 
    971 /************************************************************************
    972 *
    973 * S a v e S o u r c e
    974 *
    975 */
    976 static int
    977 SaveSource(char *src, char *codebase, char *basedir, char *archiveDir)
    978 {
    979    char *from = NULL, *to = NULL;
    980    int retval = -1;
    981    char *arcDir = NULL;
    982 
    983    if (!src || !archiveDir) {
    984        return -1;
    985    }
    986 
    987    if (dumpParse) {
    988        PR_fprintf(outputFD, "SaveSource: src=%s, codebase=%s, basedir=%s,\n"
    989                             "archiveDir=%s\n",
    990                   src, codebase, basedir, archiveDir);
    991    }
    992 
    993    if (codebase) {
    994        arcDir = PR_smprintf("%s/%s/%s/", basedir, codebase, archiveDir);
    995    } else {
    996        arcDir = PR_smprintf("%s/%s/", basedir, archiveDir);
    997    }
    998 
    999    if (codebase) {
   1000        from = PR_smprintf("%s/%s/%s", basedir, codebase, src);
   1001        to = PR_smprintf("%s%s", arcDir, src);
   1002    } else {
   1003        from = PR_smprintf("%s/%s", basedir, src);
   1004        to = PR_smprintf("%s%s", arcDir, src);
   1005    }
   1006 
   1007    if (make_dirs(to, 0777)) {
   1008        PR_fprintf(errorFD,
   1009                   "ERROR: Unable to create archive directory %s.\n", archiveDir);
   1010        errorCount++;
   1011        goto finish;
   1012    }
   1013 
   1014    retval = copyinto(from, to);
   1015 finish:
   1016    if (from)
   1017        PR_Free(from);
   1018    if (to)
   1019        PR_Free(to);
   1020    if (arcDir)
   1021        PR_Free(arcDir);
   1022    return retval;
   1023 }
   1024 
   1025 /************************************************************************
   1026 *
   1027 * T a g T y p e T o S t r i n g
   1028 *
   1029 */
   1030 char *
   1031 TagTypeToString(TAG_TYPE type)
   1032 {
   1033    switch (type) {
   1034        case APPLET_TAG:
   1035            return "APPLET";
   1036        case SCRIPT_TAG:
   1037            return "SCRIPT";
   1038        case LINK_TAG:
   1039            return "LINK";
   1040        case STYLE_TAG:
   1041            return "STYLE";
   1042        default:
   1043            break;
   1044    }
   1045    return "unknown";
   1046 }
   1047 
   1048 /************************************************************************
   1049 *
   1050 * e x t r a c t _ j s
   1051 *
   1052 */
   1053 static int
   1054 extract_js(char *filename)
   1055 {
   1056    PRFileDesc *fd = NULL;
   1057    FileBuffer *fb = NULL;
   1058    HTMLItem *head = NULL;
   1059    HTMLItem *tail = NULL;
   1060    HTMLItem *curitem = NULL;
   1061    HTMLItem *styleList = NULL;
   1062    HTMLItem *styleListTail = NULL;
   1063    HTMLItem *entityList = NULL;
   1064    HTMLItem *entityListTail = NULL;
   1065    TagItem *tagp = NULL;
   1066    char *text = NULL;
   1067    char *tagerr = NULL;
   1068    char *archiveDir = NULL;
   1069    char *firstArchiveDir = NULL;
   1070    char *basedir = NULL;
   1071    PRInt32 textStart;
   1072    PRInt32 curOffset;
   1073    HTML_STATE state;
   1074    int curchar;
   1075    int retval = -1;
   1076    unsigned int linenum, startLine;
   1077 
   1078    /* Initialize the implicit ID counter for each file */
   1079    idOrdinal = 0;
   1080 
   1081    /*
   1082     * First, parse the HTML into a stream of tags and text.
   1083     */
   1084 
   1085    fd = PR_Open(filename, PR_RDONLY, 0);
   1086    if (!fd) {
   1087        PR_fprintf(errorFD, "Unable to open %s for reading.\n", filename);
   1088        errorCount++;
   1089        return -1;
   1090    }
   1091 
   1092    /* Construct base directory of filename. */
   1093    {
   1094        char *cp;
   1095 
   1096        basedir = PL_strdup(filename);
   1097 
   1098        /* Remove trailing slashes */
   1099        while ((cp = PL_strprbrk(basedir, "/\\")) ==
   1100               (basedir + strlen(basedir) - 1)) {
   1101            *cp = '\0';
   1102        }
   1103 
   1104        /* Now remove everything from the last slash (which will be followed
   1105         * by a filename) to the end */
   1106        cp = PL_strprbrk(basedir, "/\\");
   1107        if (cp) {
   1108            *cp = '\0';
   1109        }
   1110    }
   1111 
   1112    state = TEXT_HTML_STATE;
   1113 
   1114    fb = FB_Create(fd);
   1115 
   1116    textStart = 0;
   1117    startLine = 0;
   1118    while (linenum = FB_GetLineNum(fb), (curchar = FB_GetChar(fb)) != EOF) {
   1119        switch (state) {
   1120            case TEXT_HTML_STATE:
   1121                if (curchar == '<') {
   1122                    /*
   1123                     * Found a tag
   1124                     */
   1125                    /* Save the text so far to a new text item */
   1126                    curOffset = FB_GetPointer(fb) - 2;
   1127                    if (curOffset >= textStart) {
   1128                        if (FB_GetRange(fb, textStart, curOffset,
   1129                                        &text) !=
   1130                            curOffset - textStart + 1) {
   1131                            PR_fprintf(errorFD,
   1132                                       "Unable to read from %s.\n",
   1133                                       filename);
   1134                            errorCount++;
   1135                            goto loser;
   1136                        }
   1137                        /* little fudge here.  If the first character on a line
   1138                         * is '<', meaning a new tag, the preceding text item
   1139                         * actually ends on the previous line.  In this case
   1140                         * we will be saying that the text segment ends on the
   1141                         * next line. I don't think this matters for text items. */
   1142                        curitem = CreateTextItem(text, startLine,
   1143                                                 linenum);
   1144                        text = NULL;
   1145                        if (tail == NULL) {
   1146                            head = tail = curitem;
   1147                        } else {
   1148                            tail->next = curitem;
   1149                            tail = curitem;
   1150                        }
   1151                    }
   1152 
   1153                    /* Process the tag */
   1154                    tagp = ProcessTag(fb, &tagerr);
   1155                    if (!tagp) {
   1156                        if (tagerr) {
   1157                            PR_fprintf(errorFD, "Error in file %s: %s\n",
   1158                                       filename, tagerr);
   1159                            errorCount++;
   1160                        } else {
   1161                            PR_fprintf(errorFD,
   1162                                       "Error in file %s, in tag starting at line %d\n",
   1163                                       filename, linenum);
   1164                            errorCount++;
   1165                        }
   1166                        goto loser;
   1167                    }
   1168                    /* Add the tag to the list */
   1169                    curitem = CreateTagItem(tagp, linenum, FB_GetLineNum(fb));
   1170                    if (tail == NULL) {
   1171                        head = tail = curitem;
   1172                    } else {
   1173                        tail->next = curitem;
   1174                        tail = curitem;
   1175                    }
   1176 
   1177                    /* What's the next state */
   1178                    if (tagp->type == SCRIPT_TAG) {
   1179                        state = SCRIPT_HTML_STATE;
   1180                    }
   1181 
   1182                    /* Start recording text from the new offset */
   1183                    textStart = FB_GetPointer(fb);
   1184                    startLine = FB_GetLineNum(fb);
   1185                } else {
   1186                    /* regular character.  Next! */
   1187                }
   1188                break;
   1189            case SCRIPT_HTML_STATE:
   1190                if (curchar == '<') {
   1191                    char *cp;
   1192                    /*
   1193                     * If this is a </script> tag, then we're at the end of the
   1194                     * script.  Otherwise, ignore
   1195                     */
   1196                    curOffset = FB_GetPointer(fb) - 1;
   1197                    cp = NULL;
   1198                    if (FB_GetRange(fb, curOffset, curOffset + 8, &cp) != 9) {
   1199                        if (cp) {
   1200                            PR_Free(cp);
   1201                            cp = NULL;
   1202                        }
   1203                    } else {
   1204                        /* compare the strings */
   1205                        if (!PORT_Strncasecmp(cp, "</script>", 9)) {
   1206                            /* This is the end of the script. Record the text. */
   1207                            curOffset--;
   1208                            if (curOffset >= textStart) {
   1209                                if (FB_GetRange(fb, textStart, curOffset, &text) !=
   1210                                    curOffset - textStart + 1) {
   1211                                    PR_fprintf(errorFD, "Unable to read from %s.\n",
   1212                                               filename);
   1213                                    errorCount++;
   1214                                    goto loser;
   1215                                }
   1216                                curitem = CreateTextItem(text, startLine, linenum);
   1217                                text = NULL;
   1218                                if (tail == NULL) {
   1219                                    head = tail = curitem;
   1220                                } else {
   1221                                    tail->next = curitem;
   1222                                    tail = curitem;
   1223                                }
   1224                            }
   1225 
   1226                            /* Now parse the /script tag and put it on the list */
   1227                            tagp = ProcessTag(fb, &tagerr);
   1228                            if (!tagp) {
   1229                                if (tagerr) {
   1230                                    PR_fprintf(errorFD, "Error in file %s: %s\n",
   1231                                               filename, tagerr);
   1232                                } else {
   1233                                    PR_fprintf(errorFD,
   1234                                               "Error in file %s, in tag starting at"
   1235                                               " line %d\n",
   1236                                               filename, linenum);
   1237                                }
   1238                                errorCount++;
   1239                                goto loser;
   1240                            }
   1241                            curitem = CreateTagItem(tagp, linenum,
   1242                                                    FB_GetLineNum(fb));
   1243                            if (tail == NULL) {
   1244                                head = tail = curitem;
   1245                            } else {
   1246                                tail->next = curitem;
   1247                                tail = curitem;
   1248                            }
   1249 
   1250                            /* go back to text state */
   1251                            state = TEXT_HTML_STATE;
   1252 
   1253                            textStart = FB_GetPointer(fb);
   1254                            startLine = FB_GetLineNum(fb);
   1255                        }
   1256                    }
   1257                }
   1258                break;
   1259        }
   1260    }
   1261 
   1262    /* End of the file.  Wrap up any remaining text */
   1263    if (state == SCRIPT_HTML_STATE) {
   1264        if (tail && tail->type == TAG_ITEM) {
   1265            PR_fprintf(errorFD, "ERROR: <SCRIPT> tag at %s:%d is not followed "
   1266                                "by a </SCRIPT> tag.\n",
   1267                       filename, tail->startLine);
   1268        } else {
   1269            PR_fprintf(errorFD, "ERROR: <SCRIPT> tag in file %s is not followed"
   1270                                " by a </SCRIPT tag.\n",
   1271                       filename);
   1272        }
   1273        errorCount++;
   1274        goto loser;
   1275    }
   1276    curOffset = FB_GetPointer(fb) - 1;
   1277    if (curOffset >= textStart) {
   1278        text = NULL;
   1279        if (FB_GetRange(fb, textStart, curOffset, &text) !=
   1280            curOffset - textStart + 1) {
   1281            PR_fprintf(errorFD, "Unable to read from %s.\n", filename);
   1282            errorCount++;
   1283            goto loser;
   1284        }
   1285        curitem = CreateTextItem(text, startLine, linenum);
   1286        text = NULL;
   1287        if (tail == NULL) {
   1288            head = tail = curitem;
   1289        } else {
   1290            tail->next = curitem;
   1291            tail = curitem;
   1292        }
   1293    }
   1294 
   1295    if (dumpParse) {
   1296        PrintHTMLStream(outputFD, head);
   1297    }
   1298 
   1299    /*
   1300     * Now we have a stream of tags and text.  Go through and deal with each.
   1301     */
   1302    for (curitem = head; curitem; curitem = curitem->next) {
   1303        AVPair *pairp = NULL;
   1304        char *src = NULL, *id = NULL, *codebase = NULL;
   1305        PRBool hasEventHandler = PR_FALSE;
   1306        int i;
   1307 
   1308        /* Reset archive directory for each tag */
   1309        if (archiveDir) {
   1310            PR_Free(archiveDir);
   1311            archiveDir = NULL;
   1312        }
   1313 
   1314        /* We only analyze tags */
   1315        if (curitem->type != TAG_ITEM) {
   1316            continue;
   1317        }
   1318 
   1319        tagp = curitem->item.tag;
   1320 
   1321        /* go through the attributes to get information */
   1322        for (pairp = tagp->attList; pairp; pairp = pairp->next) {
   1323 
   1324            /* ARCHIVE= */
   1325            if (!PL_strcasecmp(pairp->attribute, "archive")) {
   1326                if (archiveDir) {
   1327                    /* Duplicate attribute.  Print warning */
   1328                    PR_fprintf(errorFD,
   1329                               "warning: \"%s\" attribute overwrites previous attribute"
   1330                               " in tag starting at %s:%d.\n",
   1331                               pairp->attribute, filename, curitem->startLine);
   1332                    warningCount++;
   1333                    PR_Free(archiveDir);
   1334                }
   1335                archiveDir = PL_strdup(pairp->value);
   1336 
   1337                /* Substiture ".arc" for ".jar" */
   1338                if ((PL_strlen(archiveDir) < 4) ||
   1339                    PL_strcasecmp((archiveDir + strlen(archiveDir) - 4),
   1340                                  ".jar")) {
   1341                    char *newArchiveDir = NULL;
   1342                    PR_fprintf(errorFD,
   1343                               "warning: ARCHIVE attribute should end in \".jar\" in tag"
   1344                               " starting on %s:%d.\n",
   1345                               filename, curitem->startLine);
   1346                    warningCount++;
   1347                    newArchiveDir = PR_smprintf("%s.arc", archiveDir);
   1348                    PR_Free(archiveDir);
   1349                    archiveDir = newArchiveDir;
   1350                } else {
   1351                    PL_strcpy(archiveDir + strlen(archiveDir) - 4, ".arc");
   1352                }
   1353 
   1354                /* Record the first archive.  This will be used later if
   1355                 * the archive is not specified */
   1356                if (firstArchiveDir == NULL) {
   1357                    firstArchiveDir = PL_strdup(archiveDir);
   1358                }
   1359            }
   1360            /* CODEBASE= */
   1361            else if (!PL_strcasecmp(pairp->attribute, "codebase")) {
   1362                if (codebase) {
   1363                    /* Duplicate attribute.  Print warning */
   1364                    PR_fprintf(errorFD,
   1365                               "warning: \"%s\" attribute overwrites previous attribute"
   1366                               " in tag staring at %s:%d.\n",
   1367                               pairp->attribute, filename, curitem->startLine);
   1368                    warningCount++;
   1369                }
   1370                codebase = pairp->value;
   1371            }
   1372            /* SRC= and HREF= */
   1373            else if (!PORT_Strcasecmp(pairp->attribute, "src") ||
   1374                     !PORT_Strcasecmp(pairp->attribute, "href")) {
   1375                if (src) {
   1376                    /* Duplicate attribute.  Print warning */
   1377                    PR_fprintf(errorFD,
   1378                               "warning: \"%s\" attribute overwrites previous attribute"
   1379                               " in tag staring at %s:%d.\n",
   1380                               pairp->attribute, filename, curitem->startLine);
   1381                    warningCount++;
   1382                }
   1383                src = pairp->value;
   1384            }
   1385            /* CODE= */
   1386            else if (!PORT_Strcasecmp(pairp->attribute, "code")) {
   1387                /*!!!XXX Change PORT to PL all over this code !!! */
   1388                if (src) {
   1389                    /* Duplicate attribute.  Print warning */
   1390                    PR_fprintf(errorFD,
   1391                               "warning: \"%s\" attribute overwrites previous attribute"
   1392                               " ,in tag staring at %s:%d.\n",
   1393                               pairp->attribute, filename, curitem->startLine);
   1394                    warningCount++;
   1395                }
   1396                src = pairp->value;
   1397 
   1398                /* Append a .class if one is not already present */
   1399                if ((PL_strlen(src) < 6) ||
   1400                    PL_strcasecmp((src + PL_strlen(src) - 6), ".class")) {
   1401                    src = PR_smprintf("%s.class", src);
   1402                    /* Put this string back into the data structure so it
   1403                     * will be deallocated properly */
   1404                    PR_Free(pairp->value);
   1405                    pairp->value = src;
   1406                }
   1407            }
   1408            /* ID= */
   1409            else if (!PL_strcasecmp(pairp->attribute, "id")) {
   1410                if (id) {
   1411                    /* Duplicate attribute.  Print warning */
   1412                    PR_fprintf(errorFD,
   1413                               "warning: \"%s\" attribute overwrites previous attribute"
   1414                               " in tag staring at %s:%d.\n",
   1415                               pairp->attribute, filename, curitem->startLine);
   1416                    warningCount++;
   1417                }
   1418                id = pairp->value;
   1419            }
   1420 
   1421            /* STYLE= */
   1422            /* style= attributes, along with JS entities, are stored into
   1423             * files with dynamically generated names. The filenames are
   1424             * based on the order in which the text is found in the file.
   1425             * All JS entities on all lines up to and including the line
   1426             * containing the end of the tag that has this style= attribute
   1427             * will be processed before this style=attribute.  So we need
   1428             * to record the line that this _tag_ (not the attribute) ends on.
   1429             */
   1430            else if (!PL_strcasecmp(pairp->attribute, "style") && pairp->value) {
   1431                HTMLItem *styleItem;
   1432                /* Put this item on the style list */
   1433                styleItem = CreateTextItem(PL_strdup(pairp->value),
   1434                                           curitem->startLine, curitem->endLine);
   1435                if (styleListTail == NULL) {
   1436                    styleList = styleListTail = styleItem;
   1437                } else {
   1438                    styleListTail->next = styleItem;
   1439                    styleListTail = styleItem;
   1440                }
   1441            }
   1442            /* Event handlers */
   1443            else {
   1444                for (i = 0; i < num_handlers; i++) {
   1445                    if (!PL_strcasecmp(event_handlers[i], pairp->attribute)) {
   1446                        hasEventHandler = PR_TRUE;
   1447                        break;
   1448                    }
   1449                }
   1450            }
   1451 
   1452            /* JS Entity */
   1453            {
   1454                char *entityStart, *entityEnd;
   1455                HTMLItem *entityItem;
   1456 
   1457                /* go through each JavaScript entity ( &{...}; ) and store it
   1458                 * in the entityList.  The important thing is to record what
   1459                 * line number it's on, so we can get it in the right order
   1460                 * in relation to style= attributes.
   1461                 * Apparently, these can't flow across lines, so the start and
   1462                 * end line will be the same.  That helps matters.
   1463                 */
   1464                entityEnd = pairp->value;
   1465                while (entityEnd &&
   1466                       (entityStart = PL_strstr(entityEnd, "&{")) /*}*/ != NULL) {
   1467                    entityStart += 2; /* point at beginning of actual entity */
   1468                    entityEnd = PL_strchr(entityStart, '}');
   1469                    if (entityEnd) {
   1470                        /* Put this item on the entity list */
   1471                        *entityEnd = '\0';
   1472                        entityItem = CreateTextItem(PL_strdup(entityStart),
   1473                                                    pairp->valueLine, pairp->valueLine);
   1474                        *entityEnd = /* { */ '}';
   1475                        if (entityListTail) {
   1476                            entityListTail->next = entityItem;
   1477                            entityListTail = entityItem;
   1478                        } else {
   1479                            entityList = entityListTail = entityItem;
   1480                        }
   1481                    }
   1482                }
   1483            }
   1484        }
   1485 
   1486        /* If no archive was supplied, we use the first one of the file */
   1487        if (!archiveDir && firstArchiveDir) {
   1488            archiveDir = PL_strdup(firstArchiveDir);
   1489        }
   1490 
   1491        /* If we have an event handler, we need to archive this tag */
   1492        if (hasEventHandler) {
   1493            if (!id) {
   1494                PR_fprintf(errorFD,
   1495                           "warning: tag starting at %s:%d has event handler but"
   1496                           " no ID attribute.  The tag will not be signed.\n",
   1497                           filename, curitem->startLine);
   1498                warningCount++;
   1499            } else if (!archiveDir) {
   1500                PR_fprintf(errorFD,
   1501                           "warning: tag starting at %s:%d has event handler but"
   1502                           " no ARCHIVE attribute.  The tag will not be signed.\n",
   1503                           filename, curitem->startLine);
   1504                warningCount++;
   1505            } else {
   1506                if (SaveInlineScript(tagp->text, id, basedir, archiveDir)) {
   1507                    goto loser;
   1508                }
   1509            }
   1510        }
   1511 
   1512        switch (tagp->type) {
   1513            case APPLET_TAG:
   1514                if (!src) {
   1515                    PR_fprintf(errorFD,
   1516                               "error: APPLET tag starting on %s:%d has no CODE "
   1517                               "attribute.\n",
   1518                               filename, curitem->startLine);
   1519                    errorCount++;
   1520                    goto loser;
   1521                } else if (!archiveDir) {
   1522                    PR_fprintf(errorFD,
   1523                               "error: APPLET tag starting on %s:%d has no ARCHIVE "
   1524                               "attribute.\n",
   1525                               filename, curitem->startLine);
   1526                    errorCount++;
   1527                    goto loser;
   1528                } else {
   1529                    if (SaveSource(src, codebase, basedir, archiveDir)) {
   1530                        goto loser;
   1531                    }
   1532                }
   1533                break;
   1534            case SCRIPT_TAG:
   1535            case LINK_TAG:
   1536            case STYLE_TAG:
   1537                if (!archiveDir) {
   1538                    PR_fprintf(errorFD,
   1539                               "error: %s tag starting on %s:%d has no ARCHIVE "
   1540                               "attribute.\n",
   1541                               TagTypeToString(tagp->type),
   1542                               filename, curitem->startLine);
   1543                    errorCount++;
   1544                    goto loser;
   1545                } else if (src) {
   1546                    if (SaveSource(src, codebase, basedir, archiveDir)) {
   1547                        goto loser;
   1548                    }
   1549                } else if (id) {
   1550                    /* Save the next text item */
   1551                    if (!curitem->next || (curitem->next->type !=
   1552                                           TEXT_ITEM)) {
   1553                        PR_fprintf(errorFD,
   1554                                   "warning: %s tag starting on %s:%d is not followed"
   1555                                   " by script text.\n",
   1556                                   TagTypeToString(tagp->type),
   1557                                   filename, curitem->startLine);
   1558                        warningCount++;
   1559                        /* just create empty file */
   1560                        if (SaveInlineScript("", id, basedir, archiveDir)) {
   1561                            goto loser;
   1562                        }
   1563                    } else {
   1564                        curitem = curitem->next;
   1565                        if (SaveInlineScript(curitem->item.text,
   1566                                             id, basedir,
   1567                                             archiveDir)) {
   1568                            goto loser;
   1569                        }
   1570                    }
   1571                } else {
   1572                    /* No src or id tag--warning */
   1573                    PR_fprintf(errorFD,
   1574                               "warning: %s tag starting on %s:%d has no SRC or"
   1575                               " ID attributes.  Will not sign.\n",
   1576                               TagTypeToString(tagp->type), filename, curitem->startLine);
   1577                    warningCount++;
   1578                }
   1579                break;
   1580            default:
   1581                /* do nothing for other tags */
   1582                break;
   1583        }
   1584    }
   1585 
   1586    /* Now deal with all the unnamable scripts */
   1587    if (firstArchiveDir) {
   1588        HTMLItem *style, *entity;
   1589 
   1590        /* Go through the lists of JS entities and style attributes.  Do them
   1591         * in chronological order within a list.  Pick the list with the lower
   1592         * endLine. In case of a tie, entities come first.
   1593         */
   1594        style = styleList;
   1595        entity = entityList;
   1596        while (style || entity) {
   1597            if (!entity || (style && (style->endLine < entity->endLine))) {
   1598                /* Process style */
   1599                SaveUnnamableScript(style->item.text, basedir, firstArchiveDir,
   1600                                    filename);
   1601                style = style->next;
   1602            } else {
   1603                /* Process entity */
   1604                SaveUnnamableScript(entity->item.text, basedir, firstArchiveDir,
   1605                                    filename);
   1606                entity = entity->next;
   1607            }
   1608        }
   1609    }
   1610 
   1611    retval = 0;
   1612 loser:
   1613    /* Blow away the stream */
   1614    while (head) {
   1615        curitem = head;
   1616        head = head->next;
   1617        DestroyHTMLItem(curitem);
   1618    }
   1619    while (styleList) {
   1620        curitem = styleList;
   1621        styleList = styleList->next;
   1622        DestroyHTMLItem(curitem);
   1623    }
   1624    while (entityList) {
   1625        curitem = entityList;
   1626        entityList = entityList->next;
   1627        DestroyHTMLItem(curitem);
   1628    }
   1629    if (text) {
   1630        PR_Free(text);
   1631        text = NULL;
   1632    }
   1633    if (fb) {
   1634        FB_Destroy(fb);
   1635        fb = NULL;
   1636    }
   1637    if (fd) {
   1638        PR_Close(fd);
   1639    }
   1640    if (tagerr) {
   1641        PR_smprintf_free(tagerr);
   1642        tagerr = NULL;
   1643    }
   1644    if (archiveDir) {
   1645        PR_Free(archiveDir);
   1646        archiveDir = NULL;
   1647    }
   1648    if (firstArchiveDir) {
   1649        PR_Free(firstArchiveDir);
   1650        firstArchiveDir = NULL;
   1651    }
   1652    if (entityListTail) {
   1653        PR_Free(entityListTail);
   1654    }
   1655    if (basedir) {
   1656        PR_Free(basedir);
   1657    }
   1658    return retval;
   1659 }
   1660 
   1661 /**********************************************************************
   1662 *
   1663 * e n s u r e E x i s t s
   1664 *
   1665 * Check for existence of indicated directory.  If it doesn't exist,
   1666 * it will be created.
   1667 * Returns PR_SUCCESS if the directory is present, PR_FAILURE otherwise.
   1668 */
   1669 static PRStatus
   1670 ensureExists(char *basepath, char *path)
   1671 {
   1672    char fn[FNSIZE];
   1673    PRDir *dir;
   1674    int c = snprintf(fn, sizeof(fn), "%s/%s", basepath, path);
   1675    if (c >= sizeof(fn)) {
   1676        return PR_FAILURE;
   1677    }
   1678 
   1679    /*PR_fprintf(outputFD, "Trying to open directory %s.\n", fn);*/
   1680 
   1681    if ((dir = PR_OpenDir(fn))) {
   1682        PR_CloseDir(dir);
   1683        return PR_SUCCESS;
   1684    }
   1685    return PR_MkDir(fn, 0777);
   1686 }
   1687 
   1688 /***************************************************************************
   1689 *
   1690 * m a k e _ d i r s
   1691 *
   1692 * Ensure that the directory portion of the path exists.  This may require
   1693 * making the directory, and its parent, and its parent's parent, etc.
   1694 */
   1695 static int
   1696 make_dirs(char *path, int file_perms)
   1697 {
   1698    char *Path;
   1699    char *start;
   1700    char *sep;
   1701    int ret = 0;
   1702    PRFileInfo info;
   1703 
   1704    if (!path) {
   1705        return 0;
   1706    }
   1707 
   1708    Path = PL_strdup(path);
   1709    if (!Path) {
   1710        return 0;
   1711    }
   1712 
   1713    start = strpbrk(Path, "/\\");
   1714    if (!start) {
   1715        goto loser;
   1716    }
   1717    start++; /* start right after first slash */
   1718 
   1719    /* Each time through the loop add one more directory. */
   1720    while ((sep = strpbrk(start, "/\\"))) {
   1721        *sep = '\0';
   1722 
   1723        if (PR_GetFileInfo(Path, &info) != PR_SUCCESS) {
   1724            /* No such dir, we have to create it */
   1725            if (PR_MkDir(Path, file_perms) != PR_SUCCESS) {
   1726                PR_fprintf(errorFD, "ERROR: Unable to create directory %s.\n",
   1727                           Path);
   1728                errorCount++;
   1729                ret = -1;
   1730                goto loser;
   1731            }
   1732        } else {
   1733            /* something exists by this name, make sure it's a directory */
   1734            if (info.type != PR_FILE_DIRECTORY) {
   1735                PR_fprintf(errorFD, "ERROR: Unable to create directory %s.\n",
   1736                           Path);
   1737                errorCount++;
   1738                ret = -1;
   1739                goto loser;
   1740            }
   1741        }
   1742 
   1743        start = sep + 1; /* start after the next slash */
   1744        *sep = '/';
   1745    }
   1746 
   1747 loser:
   1748    PR_Free(Path);
   1749    return ret;
   1750 }
   1751 
   1752 /*
   1753 *  c o p y i n t o
   1754 *
   1755 *  Function to copy file "from" to path "to".
   1756 *
   1757 */
   1758 static int
   1759 copyinto(char *from, char *to)
   1760 {
   1761    PRInt32 num;
   1762    char buf[BUFSIZ];
   1763    PRFileDesc *infp = NULL, *outfp = NULL;
   1764    int retval = -1;
   1765 
   1766    if ((infp = PR_Open(from, PR_RDONLY, 0777)) == NULL) {
   1767        PR_fprintf(errorFD, "ERROR: Unable to open \"%s\" for reading.\n",
   1768                   from);
   1769        errorCount++;
   1770        goto finish;
   1771    }
   1772 
   1773    /* If to already exists, print a warning before deleting it */
   1774    if (PR_Access(to, PR_ACCESS_EXISTS) == PR_SUCCESS) {
   1775        PR_fprintf(errorFD, "warning: %s already exists--will overwrite\n", to);
   1776        warningCount++;
   1777        if (rm_dash_r(to)) {
   1778            PR_fprintf(errorFD,
   1779                       "ERROR: Unable to remove %s.\n", to);
   1780            errorCount++;
   1781            goto finish;
   1782        }
   1783    }
   1784 
   1785    if ((outfp = PR_Open(to, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0777)) ==
   1786        NULL) {
   1787        char *errBuf = NULL;
   1788 
   1789        errBuf = PR_Malloc(PR_GetErrorTextLength() + 1);
   1790        PR_fprintf(errorFD, "ERROR: Unable to open \"%s\" for writing.\n", to);
   1791        if (PR_GetErrorText(errBuf)) {
   1792            PR_fprintf(errorFD, "Cause: %s\n", errBuf);
   1793        }
   1794        if (errBuf) {
   1795            PR_Free(errBuf);
   1796        }
   1797        errorCount++;
   1798        goto finish;
   1799    }
   1800 
   1801    while ((num = PR_Read(infp, buf, BUFSIZ)) > 0) {
   1802        if (PR_Write(outfp, buf, num) != num) {
   1803            PR_fprintf(errorFD, "ERROR: Error writing to %s.\n", to);
   1804            errorCount++;
   1805            goto finish;
   1806        }
   1807    }
   1808 
   1809    retval = 0;
   1810 finish:
   1811    if (infp)
   1812        PR_Close(infp);
   1813    if (outfp)
   1814        PR_Close(outfp);
   1815 
   1816    return retval;
   1817 }