tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ntio.c (129544B)


      1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 /* Windows NT IO module
      7 *
      8 * This module handles IO for LOCAL_SCOPE and GLOBAL_SCOPE threads.
      9 * For LOCAL_SCOPE threads, we're using NT fibers.  For GLOBAL_SCOPE threads
     10 * we're using NT-native threads.
     11 *
     12 * When doing IO, we want to use completion ports for optimal performance
     13 * with fibers.  But if we use completion ports for all IO, it is difficult
     14 * to project a blocking model with GLOBAL_SCOPE threads.  To handle this
     15 * we create an extra thread for completing IO for GLOBAL_SCOPE threads.
     16 * We don't really want to complete IO on a separate thread for LOCAL_SCOPE
     17 * threads because it means extra context switches, which are really slow
     18 * on NT...  Since we're using a single completion port, some IO will
     19 * be incorrectly completed on the GLOBAL_SCOPE IO thread; this will mean
     20 * extra context switching; but I don't think there is anything I can do
     21 * about it.
     22 */
     23 
     24 #include "primpl.h"
     25 #include "pprmwait.h"
     26 #include <direct.h>
     27 #include <mbstring.h>
     28 
     29 static HANDLE _pr_completion_port;
     30 static PRThread* _pr_io_completion_thread;
     31 
     32 #define RECYCLE_SIZE 512
     33 static struct _MDLock _pr_recycle_lock;
     34 static PRInt32 _pr_recycle_INET_array[RECYCLE_SIZE];
     35 static PRInt32 _pr_recycle_INET_tail = 0;
     36 static PRInt32 _pr_recycle_INET6_array[RECYCLE_SIZE];
     37 static PRInt32 _pr_recycle_INET6_tail = 0;
     38 
     39 __declspec(thread) PRThread* _pr_io_restarted_io = NULL;
     40 DWORD _pr_io_restartedIOIndex; /* The thread local storage slot for each
     41                                * thread is initialized to NULL. */
     42 
     43 PRBool _nt_version_gets_lockfile_completion;
     44 
     45 struct _MDLock _pr_ioq_lock;
     46 extern _MDLock _nt_idleLock;
     47 extern PRCList _nt_idleList;
     48 extern PRUint32 _nt_idleCount;
     49 
     50 #define CLOSE_TIMEOUT PR_SecondsToInterval(5)
     51 
     52 /*
     53 * NSPR-to-NT access right mapping table for files.
     54 */
     55 static DWORD fileAccessTable[] = {FILE_GENERIC_READ, FILE_GENERIC_WRITE,
     56                                  FILE_GENERIC_EXECUTE};
     57 
     58 /*
     59 * NSPR-to-NT access right mapping table for directories.
     60 */
     61 static DWORD dirAccessTable[] = {FILE_GENERIC_READ,
     62                                 FILE_GENERIC_WRITE | FILE_DELETE_CHILD,
     63                                 FILE_GENERIC_EXECUTE};
     64 
     65 static PRBool IsPrevCharSlash(const char* str, const char* current);
     66 
     67 #define _NEED_351_FILE_LOCKING_HACK
     68 #ifdef _NEED_351_FILE_LOCKING_HACK
     69 #  define _PR_LOCAL_FILE 1
     70 #  define _PR_REMOTE_FILE 2
     71 PRBool IsFileLocalInit();
     72 PRInt32 IsFileLocal(HANDLE hFile);
     73 #endif /* _NEED_351_FILE_LOCKING_HACK */
     74 
     75 static PRInt32 _md_MakeNonblock(HANDLE);
     76 
     77 static PROsfd _nt_nonblock_accept(PRFileDesc* fd, struct sockaddr* addr,
     78                                  int* addrlen, PRIntervalTime);
     79 static PRInt32 _nt_nonblock_connect(PRFileDesc* fd, struct sockaddr* addr,
     80                                    int addrlen, PRIntervalTime);
     81 static PRInt32 _nt_nonblock_recv(PRFileDesc* fd, char* buf, int len, int flags,
     82                                 PRIntervalTime);
     83 static PRInt32 _nt_nonblock_send(PRFileDesc* fd, char* buf, int len,
     84                                 PRIntervalTime);
     85 static PRInt32 _nt_nonblock_writev(PRFileDesc* fd, const PRIOVec* iov, int size,
     86                                   PRIntervalTime);
     87 static PRInt32 _nt_nonblock_sendto(PRFileDesc*, const char*, int,
     88                                   const struct sockaddr*, int, PRIntervalTime);
     89 static PRInt32 _nt_nonblock_recvfrom(PRFileDesc*, char*, int, struct sockaddr*,
     90                                     int*, PRIntervalTime);
     91 
     92 /*
     93 * We cannot associate a fd (a socket) with an I/O completion port
     94 * if the fd is nonblocking or inheritable.
     95 *
     96 * Nonblocking socket I/O won't work if the socket is associated with
     97 * an I/O completion port.
     98 *
     99 * An inheritable fd cannot be associated with an I/O completion port
    100 * because the completion notification of async I/O initiated by the
    101 * child process is still posted to the I/O completion port in the
    102 * parent process.
    103 */
    104 #define _NT_USE_NB_IO(fd) \
    105  ((fd)->secret->nonblocking || (fd)->secret->inheritable == _PR_TRI_TRUE)
    106 
    107 /*
    108 * UDP support
    109 *
    110 * UDP is supported on NT by the continuation thread mechanism.
    111 * The code is borrowed from ptio.c in pthreads nspr, hence the
    112 * PT and pt prefixes.  This mechanism is in fact general and
    113 * not limited to UDP.  For now, only UDP's recvfrom and sendto
    114 * go through the continuation thread if they get WSAEWOULDBLOCK
    115 * on first try.  Recv and send on a connected UDP socket still
    116 * goes through asychronous io.
    117 */
    118 
    119 #define PT_DEFAULT_SELECT_MSEC 100
    120 
    121 typedef struct pt_Continuation pt_Continuation;
    122 typedef PRBool (*ContinuationFn)(pt_Continuation* op, PRInt16 revent);
    123 
    124 typedef enum pr_ContuationStatus {
    125  pt_continuation_sumbitted,
    126  pt_continuation_inprogress,
    127  pt_continuation_abort,
    128  pt_continuation_done
    129 } pr_ContuationStatus;
    130 
    131 struct pt_Continuation {
    132  /* These objects are linked in ascending timeout order */
    133  pt_Continuation *next, *prev; /* self linked list of these things */
    134 
    135  /* The building of the continuation operation */
    136  ContinuationFn function; /* what function to continue */
    137  union {
    138    SOCKET osfd;
    139  } arg1; /* #1 - the op's fd */
    140  union {
    141    void* buffer;
    142  } arg2; /* #2 - primary transfer buffer */
    143  union {
    144    PRIntn amount;
    145  } arg3; /* #3 - size of 'buffer' */
    146  union {
    147    PRIntn flags;
    148  } arg4; /* #4 - read/write flags */
    149  union {
    150    PRNetAddr* addr;
    151  } arg5; /* #5 - send/recv address */
    152 
    153  PRIntervalTime timeout; /* representation of the timeout */
    154 
    155  PRIntn event; /* flags for select()'s events */
    156 
    157  /*
    158  ** The representation and notification of the results of the operation.
    159  ** These function can either return an int return code or a pointer to
    160  ** some object.
    161  */
    162  union {
    163    PRIntn code;
    164    void* object;
    165  } result;
    166 
    167  PRIntn syserrno;            /* in case it failed, why (errno) */
    168  pr_ContuationStatus status; /* the status of the operation */
    169  PRCondVar* complete;        /* to notify the initiating thread */
    170 };
    171 
    172 static struct pt_TimedQueue {
    173  PRLock* ml;                   /* a little protection */
    174  PRThread* thread;             /* internal thread's identification */
    175  PRCondVar* new_op;            /* new operation supplied */
    176  PRCondVar* finish_op;         /* an existing operation finished */
    177  PRUintn op_count;             /* number of operations in the list */
    178  pt_Continuation *head, *tail; /* head/tail of list of operations */
    179 
    180  pt_Continuation* op;  /* timed operation furthest in future */
    181  PRIntervalTime epoch; /* the epoch of 'timed' */
    182 } pt_tq;
    183 
    184 #if defined(DEBUG)
    185 static struct pt_debug_s {
    186  PRIntn predictionsFoiled;
    187  PRIntn pollingListMax;
    188  PRIntn continuationsServed;
    189 } pt_debug;
    190 #endif /* DEBUG */
    191 
    192 static void ContinuationThread(void* arg);
    193 static PRInt32 pt_SendTo(SOCKET osfd, const void* buf, PRInt32 amount,
    194                         PRInt32 flags, const PRNetAddr* addr, PRIntn addrlen,
    195                         PRIntervalTime timeout);
    196 static PRInt32 pt_RecvFrom(SOCKET osfd, void* buf, PRInt32 amount,
    197                           PRInt32 flags, PRNetAddr* addr, PRIntn* addr_len,
    198                           PRIntervalTime timeout);
    199 
    200 /* The key returned from GetQueuedCompletionStatus() is used to determine what
    201 * type of completion we have.  We differentiate between IO completions and
    202 * CVAR completions.
    203 */
    204 #define KEY_IO 0xaaaaaaaa
    205 #define KEY_CVAR 0xbbbbbbbb
    206 
    207 PRInt32 _PR_MD_PAUSE_CPU(PRIntervalTime ticks) {
    208  int awoken = 0;
    209  unsigned long bytes, key;
    210  int rv;
    211  LPOVERLAPPED olp;
    212  _MDOverlapped* mdOlp;
    213  PRUint32 timeout;
    214 
    215  if (_nt_idleCount > 0) {
    216    PRThread* deadThread;
    217 
    218    _MD_LOCK(&_nt_idleLock);
    219    while (!PR_CLIST_IS_EMPTY(&_nt_idleList)) {
    220      deadThread = _PR_THREAD_PTR(PR_LIST_HEAD(&_nt_idleList));
    221      PR_REMOVE_LINK(&deadThread->links);
    222 
    223      PR_ASSERT(deadThread->state == _PR_DEAD_STATE);
    224 
    225      /* XXXMB - cleanup to do here? */
    226      if (!_PR_IS_NATIVE_THREAD(deadThread)) {
    227        /* Spinlock while user thread is still running.
    228         * There is no way to use a condition variable here. The thread
    229         * is dead, and we have to wait until we switch off the dead
    230         * thread before we can kill the fiber completely.
    231         */
    232        while (deadThread->no_sched);
    233 
    234        DeleteFiber(deadThread->md.fiber_id);
    235      }
    236      memset(deadThread, 0xa, sizeof(PRThread)); /* debugging */
    237      if (!deadThread->threadAllocatedOnStack) {
    238        PR_DELETE(deadThread);
    239      }
    240      _nt_idleCount--;
    241    }
    242    _MD_UNLOCK(&_nt_idleLock);
    243  }
    244 
    245  if (ticks == PR_INTERVAL_NO_TIMEOUT)
    246 #if 0
    247        timeout = INFINITE;
    248 #else
    249    /*
    250     * temporary hack to poll the runq every 5 seconds because of bug in
    251     * native threads creating user threads and not poking the right cpu.
    252     *
    253     * A local thread that was interrupted is bound to its current
    254     * cpu but there is no easy way for the interrupter to poke the
    255     * right cpu.  This is a hack to poll the runq every 5 seconds.
    256     */
    257    timeout = 5000;
    258 #endif
    259  else {
    260    timeout = PR_IntervalToMilliseconds(ticks);
    261  }
    262 
    263  /*
    264   * The idea of looping here is to complete as many IOs as possible before
    265   * returning.  This should minimize trips to the idle thread.
    266   */
    267  while (1) {
    268    rv = GetQueuedCompletionStatus(_pr_completion_port, &bytes, &key, &olp,
    269                                   timeout);
    270    if (rv == 0 && olp == NULL) {
    271      /* Error in GetQueuedCompetionStatus */
    272      if (GetLastError() != WAIT_TIMEOUT) {
    273        /* ARGH - what can we do here? Log an error? XXXMB */
    274        return -1;
    275      } else {
    276        /* If awoken == 0, then we just had a timeout */
    277        return awoken;
    278      }
    279    }
    280 
    281    if (olp == NULL) {
    282      return 0;
    283    }
    284 
    285    mdOlp = (_MDOverlapped*)olp;
    286 
    287    if (mdOlp->ioModel == _MD_MultiWaitIO) {
    288      PRRecvWait* desc;
    289      PRWaitGroup* group;
    290      PRThread* thred = NULL;
    291      PRMWStatus mwstatus;
    292 
    293      desc = mdOlp->data.mw.desc;
    294      PR_ASSERT(desc != NULL);
    295      mwstatus = rv ? PR_MW_SUCCESS : PR_MW_FAILURE;
    296      if (InterlockedCompareExchange((PVOID*)&desc->outcome, (PVOID)mwstatus,
    297                                     (PVOID)PR_MW_PENDING) ==
    298          (PVOID)PR_MW_PENDING) {
    299        if (mwstatus == PR_MW_SUCCESS) {
    300          desc->bytesRecv = bytes;
    301        } else {
    302          mdOlp->data.mw.error = GetLastError();
    303        }
    304      }
    305      group = mdOlp->data.mw.group;
    306      PR_ASSERT(group != NULL);
    307 
    308      _PR_MD_LOCK(&group->mdlock);
    309      PR_APPEND_LINK(&mdOlp->data.mw.links, &group->io_ready);
    310      PR_ASSERT(desc->fd != NULL);
    311      NT_HashRemoveInternal(group, desc->fd);
    312      if (!PR_CLIST_IS_EMPTY(&group->wait_list)) {
    313        thred = _PR_THREAD_CONDQ_PTR(PR_LIST_HEAD(&group->wait_list));
    314        PR_REMOVE_LINK(&thred->waitQLinks);
    315      }
    316      _PR_MD_UNLOCK(&group->mdlock);
    317 
    318      if (thred) {
    319        if (!_PR_IS_NATIVE_THREAD(thred)) {
    320          int pri = thred->priority;
    321          _PRCPU* lockedCPU = _PR_MD_CURRENT_CPU();
    322          _PR_THREAD_LOCK(thred);
    323          if (thred->flags & _PR_ON_PAUSEQ) {
    324            _PR_SLEEPQ_LOCK(thred->cpu);
    325            _PR_DEL_SLEEPQ(thred, PR_TRUE);
    326            _PR_SLEEPQ_UNLOCK(thred->cpu);
    327            _PR_THREAD_UNLOCK(thred);
    328            thred->cpu = lockedCPU;
    329            thred->state = _PR_RUNNABLE;
    330            _PR_RUNQ_LOCK(lockedCPU);
    331            _PR_ADD_RUNQ(thred, lockedCPU, pri);
    332            _PR_RUNQ_UNLOCK(lockedCPU);
    333          } else {
    334            /*
    335             * The thread was just interrupted and moved
    336             * from the pause queue to the run queue.
    337             */
    338            _PR_THREAD_UNLOCK(thred);
    339          }
    340        } else {
    341          _PR_THREAD_LOCK(thred);
    342          thred->state = _PR_RUNNABLE;
    343          _PR_THREAD_UNLOCK(thred);
    344          ReleaseSemaphore(thred->md.blocked_sema, 1, NULL);
    345        }
    346      }
    347    } else {
    348      PRThread* completed_io;
    349 
    350      PR_ASSERT(mdOlp->ioModel == _MD_BlockingIO);
    351      completed_io = _PR_THREAD_MD_TO_PTR(mdOlp->data.mdThread);
    352      completed_io->md.blocked_io_status = rv;
    353      if (rv == 0) {
    354        completed_io->md.blocked_io_error = GetLastError();
    355      }
    356      completed_io->md.blocked_io_bytes = bytes;
    357 
    358      if (!_PR_IS_NATIVE_THREAD(completed_io)) {
    359        int pri = completed_io->priority;
    360        _PRCPU* lockedCPU = _PR_MD_CURRENT_CPU();
    361 
    362        /* The KEY_CVAR notification only occurs when a native thread
    363         * is notifying a user thread.  For user-user notifications
    364         * the wakeup occurs by having the notifier place the thread
    365         * on the runq directly; for native-native notifications the
    366         * wakeup occurs by calling ReleaseSemaphore.
    367         */
    368        if (key == KEY_CVAR) {
    369          PR_ASSERT(completed_io->io_pending == PR_FALSE);
    370          PR_ASSERT(completed_io->io_suspended == PR_FALSE);
    371          PR_ASSERT(completed_io->md.thr_bound_cpu == NULL);
    372 
    373          /* Thread has already been deleted from sleepQ */
    374 
    375          /* Switch CPU and add to runQ */
    376          completed_io->cpu = lockedCPU;
    377          completed_io->state = _PR_RUNNABLE;
    378          _PR_RUNQ_LOCK(lockedCPU);
    379          _PR_ADD_RUNQ(completed_io, lockedCPU, pri);
    380          _PR_RUNQ_UNLOCK(lockedCPU);
    381        } else {
    382          PR_ASSERT(key == KEY_IO);
    383          PR_ASSERT(completed_io->io_pending == PR_TRUE);
    384 
    385          _PR_THREAD_LOCK(completed_io);
    386 
    387          completed_io->io_pending = PR_FALSE;
    388 
    389          /* If io_suspended is true, then this IO has already resumed.
    390           * We don't need to do anything; because the thread is
    391           * already running.
    392           */
    393          if (completed_io->io_suspended == PR_FALSE) {
    394            if (completed_io->flags & (_PR_ON_SLEEPQ | _PR_ON_PAUSEQ)) {
    395              _PR_SLEEPQ_LOCK(completed_io->cpu);
    396              _PR_DEL_SLEEPQ(completed_io, PR_TRUE);
    397              _PR_SLEEPQ_UNLOCK(completed_io->cpu);
    398 
    399              _PR_THREAD_UNLOCK(completed_io);
    400 
    401              /*
    402               * If an I/O operation is suspended, the thread
    403               * must be running on the same cpu on which the
    404               * I/O operation was issued.
    405               */
    406              PR_ASSERT(!completed_io->md.thr_bound_cpu ||
    407                        (completed_io->cpu == completed_io->md.thr_bound_cpu));
    408 
    409              if (!completed_io->md.thr_bound_cpu) {
    410                completed_io->cpu = lockedCPU;
    411              }
    412              completed_io->state = _PR_RUNNABLE;
    413              _PR_RUNQ_LOCK(completed_io->cpu);
    414              _PR_ADD_RUNQ(completed_io, completed_io->cpu, pri);
    415              _PR_RUNQ_UNLOCK(completed_io->cpu);
    416            } else {
    417              _PR_THREAD_UNLOCK(completed_io);
    418            }
    419          } else {
    420            _PR_THREAD_UNLOCK(completed_io);
    421          }
    422        }
    423      } else {
    424        /* For native threads, they are only notified through this loop
    425         * when completing IO.  So, don't worry about this being a CVAR
    426         * notification, because that is not possible.
    427         */
    428        _PR_THREAD_LOCK(completed_io);
    429        completed_io->io_pending = PR_FALSE;
    430        if (completed_io->io_suspended == PR_FALSE) {
    431          completed_io->state = _PR_RUNNABLE;
    432          _PR_THREAD_UNLOCK(completed_io);
    433          rv = ReleaseSemaphore(completed_io->md.blocked_sema, 1, NULL);
    434          PR_ASSERT(0 != rv);
    435        } else {
    436          _PR_THREAD_UNLOCK(completed_io);
    437        }
    438      }
    439    }
    440 
    441    awoken++;
    442    timeout = 0; /* Don't block on subsequent trips through the loop */
    443  }
    444 
    445  /* never reached */
    446  return 0;
    447 }
    448 
    449 static PRStatus _native_thread_md_wait(PRThread* thread, PRIntervalTime ticks) {
    450  DWORD rv;
    451  PRUint32 msecs = (ticks == PR_INTERVAL_NO_TIMEOUT)
    452                       ? INFINITE
    453                       : PR_IntervalToMilliseconds(ticks);
    454 
    455  /*
    456   * thread waiting for a cvar or a joining thread
    457   */
    458  rv = WaitForSingleObject(thread->md.blocked_sema, msecs);
    459  switch (rv) {
    460    case WAIT_OBJECT_0:
    461      return PR_SUCCESS;
    462      break;
    463    case WAIT_TIMEOUT:
    464      _PR_THREAD_LOCK(thread);
    465      PR_ASSERT(thread->state != _PR_IO_WAIT);
    466      if (thread->wait.cvar != NULL) {
    467        PR_ASSERT(thread->state == _PR_COND_WAIT);
    468        thread->wait.cvar = NULL;
    469        thread->state = _PR_RUNNING;
    470        _PR_THREAD_UNLOCK(thread);
    471      } else {
    472        /* The CVAR was notified just as the timeout
    473         * occurred.  This left the semaphore in the
    474         * signaled state.  Call WaitForSingleObject()
    475         * to clear the semaphore.
    476         */
    477        _PR_THREAD_UNLOCK(thread);
    478        rv = WaitForSingleObject(thread->md.blocked_sema, INFINITE);
    479        PR_ASSERT(rv == WAIT_OBJECT_0);
    480      }
    481      return PR_SUCCESS;
    482      break;
    483    default:
    484      return PR_FAILURE;
    485      break;
    486  }
    487 
    488  return PR_SUCCESS;
    489 }
    490 
    491 PRStatus _PR_MD_WAIT(PRThread* thread, PRIntervalTime ticks) {
    492  DWORD rv;
    493 
    494  if (_native_threads_only) {
    495    return (_native_thread_md_wait(thread, ticks));
    496  }
    497  if (thread->flags & _PR_GLOBAL_SCOPE) {
    498    PRUint32 msecs = (ticks == PR_INTERVAL_NO_TIMEOUT)
    499                         ? INFINITE
    500                         : PR_IntervalToMilliseconds(ticks);
    501    rv = WaitForSingleObject(thread->md.blocked_sema, msecs);
    502    switch (rv) {
    503      case WAIT_OBJECT_0:
    504        return PR_SUCCESS;
    505        break;
    506      case WAIT_TIMEOUT:
    507        _PR_THREAD_LOCK(thread);
    508        if (thread->state == _PR_IO_WAIT) {
    509          if (thread->io_pending == PR_TRUE) {
    510            thread->state = _PR_RUNNING;
    511            thread->io_suspended = PR_TRUE;
    512            _PR_THREAD_UNLOCK(thread);
    513          } else {
    514            /* The IO completed just at the same time the timeout
    515             * occurred.  This left the semaphore in the signaled
    516             * state.  Call WaitForSingleObject() to clear the
    517             * semaphore.
    518             */
    519            _PR_THREAD_UNLOCK(thread);
    520            rv = WaitForSingleObject(thread->md.blocked_sema, INFINITE);
    521            PR_ASSERT(rv == WAIT_OBJECT_0);
    522          }
    523        } else {
    524          if (thread->wait.cvar != NULL) {
    525            PR_ASSERT(thread->state == _PR_COND_WAIT);
    526            thread->wait.cvar = NULL;
    527            thread->state = _PR_RUNNING;
    528            _PR_THREAD_UNLOCK(thread);
    529          } else {
    530            /* The CVAR was notified just as the timeout
    531             * occurred.  This left the semaphore in the
    532             * signaled state.  Call WaitForSingleObject()
    533             * to clear the semaphore.
    534             */
    535            _PR_THREAD_UNLOCK(thread);
    536            rv = WaitForSingleObject(thread->md.blocked_sema, INFINITE);
    537            PR_ASSERT(rv == WAIT_OBJECT_0);
    538          }
    539        }
    540        return PR_SUCCESS;
    541        break;
    542      default:
    543        return PR_FAILURE;
    544        break;
    545    }
    546  } else {
    547    PRInt32 is;
    548 
    549    _PR_INTSOFF(is);
    550    _PR_MD_SWITCH_CONTEXT(thread);
    551  }
    552 
    553  return PR_SUCCESS;
    554 }
    555 
    556 static void _native_thread_io_nowait(PRThread* thread, int rv, int bytes) {
    557  int rc;
    558 
    559  PR_ASSERT(rv != 0);
    560  _PR_THREAD_LOCK(thread);
    561  if (thread->state == _PR_IO_WAIT) {
    562    PR_ASSERT(thread->io_suspended == PR_FALSE);
    563    PR_ASSERT(thread->io_pending == PR_TRUE);
    564    thread->state = _PR_RUNNING;
    565    thread->io_pending = PR_FALSE;
    566    _PR_THREAD_UNLOCK(thread);
    567  } else {
    568    /* The IO completed just at the same time the
    569     * thread was interrupted. This left the semaphore
    570     * in the signaled state. Call WaitForSingleObject()
    571     * to clear the semaphore.
    572     */
    573    PR_ASSERT(thread->io_suspended == PR_TRUE);
    574    PR_ASSERT(thread->io_pending == PR_TRUE);
    575    thread->io_pending = PR_FALSE;
    576    _PR_THREAD_UNLOCK(thread);
    577    rc = WaitForSingleObject(thread->md.blocked_sema, INFINITE);
    578    PR_ASSERT(rc == WAIT_OBJECT_0);
    579  }
    580 
    581  thread->md.blocked_io_status = rv;
    582  thread->md.blocked_io_bytes = bytes;
    583  rc = ResetEvent(thread->md.thr_event);
    584  PR_ASSERT(rc != 0);
    585  return;
    586 }
    587 
    588 static PRStatus _native_thread_io_wait(PRThread* thread, PRIntervalTime ticks) {
    589  DWORD rv, bytes;
    590 #define _NATIVE_IO_WAIT_HANDLES 2
    591 #define _NATIVE_WAKEUP_EVENT_INDEX 0
    592 #define _NATIVE_IO_EVENT_INDEX 1
    593 
    594  HANDLE wait_handles[_NATIVE_IO_WAIT_HANDLES];
    595 
    596  PRUint32 msecs = (ticks == PR_INTERVAL_NO_TIMEOUT)
    597                       ? INFINITE
    598                       : PR_IntervalToMilliseconds(ticks);
    599 
    600  PR_ASSERT(thread->flags & _PR_GLOBAL_SCOPE);
    601 
    602  wait_handles[0] = thread->md.blocked_sema;
    603  wait_handles[1] = thread->md.thr_event;
    604  rv = WaitForMultipleObjects(_NATIVE_IO_WAIT_HANDLES, wait_handles, FALSE,
    605                              msecs);
    606 
    607  switch (rv) {
    608    case WAIT_OBJECT_0 + _NATIVE_IO_EVENT_INDEX:
    609      /*
    610       * I/O op completed
    611       */
    612      _PR_THREAD_LOCK(thread);
    613      if (thread->state == _PR_IO_WAIT) {
    614        PR_ASSERT(thread->io_suspended == PR_FALSE);
    615        PR_ASSERT(thread->io_pending == PR_TRUE);
    616        thread->state = _PR_RUNNING;
    617        thread->io_pending = PR_FALSE;
    618        _PR_THREAD_UNLOCK(thread);
    619      } else {
    620        /* The IO completed just at the same time the
    621         * thread was interrupted. This led to us being
    622         * notified twice. Call WaitForSingleObject()
    623         * to clear the semaphore.
    624         */
    625        PR_ASSERT(thread->io_suspended == PR_TRUE);
    626        PR_ASSERT(thread->io_pending == PR_TRUE);
    627        thread->io_pending = PR_FALSE;
    628        _PR_THREAD_UNLOCK(thread);
    629        rv = WaitForSingleObject(thread->md.blocked_sema, INFINITE);
    630        PR_ASSERT(rv == WAIT_OBJECT_0);
    631      }
    632 
    633      rv =
    634          GetOverlappedResult((HANDLE)thread->io_fd,
    635                              &thread->md.overlapped.overlapped, &bytes, FALSE);
    636 
    637      thread->md.blocked_io_status = rv;
    638      if (rv != 0) {
    639        thread->md.blocked_io_bytes = bytes;
    640      } else {
    641        thread->md.blocked_io_error = GetLastError();
    642        PR_ASSERT(ERROR_IO_PENDING != thread->md.blocked_io_error);
    643      }
    644      rv = ResetEvent(thread->md.thr_event);
    645      PR_ASSERT(rv != 0);
    646      break;
    647    case WAIT_OBJECT_0 + _NATIVE_WAKEUP_EVENT_INDEX:
    648      /*
    649       * I/O interrupted;
    650       */
    651 #ifdef DEBUG
    652      _PR_THREAD_LOCK(thread);
    653      PR_ASSERT(thread->io_suspended == PR_TRUE);
    654      _PR_THREAD_UNLOCK(thread);
    655 #endif
    656      break;
    657    case WAIT_TIMEOUT:
    658      _PR_THREAD_LOCK(thread);
    659      if (thread->state == _PR_IO_WAIT) {
    660        thread->state = _PR_RUNNING;
    661        thread->io_suspended = PR_TRUE;
    662        _PR_THREAD_UNLOCK(thread);
    663      } else {
    664        /*
    665         * The thread was interrupted just as the timeout
    666         * occurred. This left the semaphore in the signaled
    667         * state. Call WaitForSingleObject() to clear the
    668         * semaphore.
    669         */
    670        PR_ASSERT(thread->io_suspended == PR_TRUE);
    671        _PR_THREAD_UNLOCK(thread);
    672        rv = WaitForSingleObject(thread->md.blocked_sema, INFINITE);
    673        PR_ASSERT(rv == WAIT_OBJECT_0);
    674      }
    675      break;
    676    default:
    677      return PR_FAILURE;
    678      break;
    679  }
    680 
    681  return PR_SUCCESS;
    682 }
    683 
    684 static PRStatus _NT_IO_WAIT(PRThread* thread, PRIntervalTime timeout) {
    685  PRBool fWait = PR_TRUE;
    686 
    687  if (_native_threads_only) {
    688    return (_native_thread_io_wait(thread, timeout));
    689  }
    690  if (!_PR_IS_NATIVE_THREAD(thread)) {
    691    _PR_THREAD_LOCK(thread);
    692 
    693    /* The IO may have already completed; if so, don't add to sleepQ,
    694     * since we are already on the runQ!
    695     */
    696    if (thread->io_pending == PR_TRUE) {
    697      _PR_SLEEPQ_LOCK(thread->cpu);
    698      _PR_ADD_SLEEPQ(thread, timeout);
    699      _PR_SLEEPQ_UNLOCK(thread->cpu);
    700    } else {
    701      fWait = PR_FALSE;
    702    }
    703    _PR_THREAD_UNLOCK(thread);
    704  }
    705  if (fWait) {
    706    return _PR_MD_WAIT(thread, timeout);
    707  } else {
    708    return PR_SUCCESS;
    709  }
    710 }
    711 
    712 /*
    713 * Unblock threads waiting for I/O
    714 * used when interrupting threads
    715 *
    716 * NOTE: The thread lock should held when this function is called.
    717 * On return, the thread lock is released.
    718 */
    719 void _PR_Unblock_IO_Wait(PRThread* thr) {
    720  PRStatus rv;
    721  _PRCPU* cpu = thr->cpu;
    722 
    723  PR_ASSERT(thr->state == _PR_IO_WAIT);
    724  /*
    725   * A thread for which an I/O timed out or was interrupted cannot be
    726   * in an IO_WAIT state except as a result of calling PR_Close or
    727   * PR_NT_CancelIo for the FD. For these two cases, _PR_IO_WAIT state
    728   * is not interruptible
    729   */
    730  if (thr->md.interrupt_disabled == PR_TRUE) {
    731    _PR_THREAD_UNLOCK(thr);
    732    return;
    733  }
    734  thr->io_suspended = PR_TRUE;
    735  thr->state = _PR_RUNNABLE;
    736 
    737  if (!_PR_IS_NATIVE_THREAD(thr)) {
    738    PRThread* me = _PR_MD_CURRENT_THREAD();
    739    PR_ASSERT(thr->flags & (_PR_ON_SLEEPQ | _PR_ON_PAUSEQ));
    740    _PR_SLEEPQ_LOCK(cpu);
    741    _PR_DEL_SLEEPQ(thr, PR_TRUE);
    742    _PR_SLEEPQ_UNLOCK(cpu);
    743    /*
    744     * this thread will continue to run on the same cpu until the
    745     * I/O is aborted by closing the FD or calling CancelIO
    746     */
    747    thr->md.thr_bound_cpu = cpu;
    748 
    749    PR_ASSERT(!(thr->flags & _PR_IDLE_THREAD));
    750    _PR_AddThreadToRunQ(me, thr);
    751  }
    752  _PR_THREAD_UNLOCK(thr);
    753  rv = _PR_MD_WAKEUP_WAITER(thr);
    754  PR_ASSERT(PR_SUCCESS == rv);
    755 }
    756 
    757 /* Resume an outstanding IO; requires that after the switch, we disable */
    758 static PRStatus _NT_ResumeIO(PRThread* thread, PRIntervalTime ticks) {
    759  PRBool fWait = PR_TRUE;
    760 
    761  if (!_PR_IS_NATIVE_THREAD(thread)) {
    762    if (_pr_use_static_tls) {
    763      _pr_io_restarted_io = thread;
    764    } else {
    765      TlsSetValue(_pr_io_restartedIOIndex, thread);
    766    }
    767  } else {
    768    _PR_THREAD_LOCK(thread);
    769    if (!thread->io_pending) {
    770      fWait = PR_FALSE;
    771    }
    772    thread->io_suspended = PR_FALSE;
    773 
    774    _PR_THREAD_UNLOCK(thread);
    775  }
    776  /* We don't put ourselves back on the sleepQ yet; until we
    777   * set the suspended bit to false, we can't do that.  Just save
    778   * the sleep time here, and then continue.  The restarted_io handler
    779   * will add us to the sleepQ if needed.
    780   */
    781  thread->sleep = ticks;
    782 
    783  if (fWait) {
    784    if (!_PR_IS_NATIVE_THREAD(thread)) {
    785      return _PR_MD_WAIT(thread, ticks);
    786    } else {
    787      return _NT_IO_WAIT(thread, ticks);
    788    }
    789  }
    790  return PR_SUCCESS;
    791 }
    792 
    793 PRStatus _PR_MD_WAKEUP_WAITER(PRThread* thread) {
    794  if (thread == NULL) {
    795    /* If thread is NULL, we aren't waking a thread, we're just poking
    796     * idle thread
    797     */
    798    if (PostQueuedCompletionStatus(_pr_completion_port, 0, KEY_CVAR, NULL) ==
    799        FALSE) {
    800      return PR_FAILURE;
    801    }
    802    return PR_SUCCESS;
    803  }
    804 
    805  if (_PR_IS_NATIVE_THREAD(thread)) {
    806    if (ReleaseSemaphore(thread->md.blocked_sema, 1, NULL) == FALSE) {
    807      return PR_FAILURE;
    808    } else {
    809      return PR_SUCCESS;
    810    }
    811  } else {
    812    PRThread* me = _PR_MD_CURRENT_THREAD();
    813 
    814    /* When a Native thread has to awaken a user thread, it has to poke
    815     * the completion port because all user threads might be idle, and
    816     * thus the CPUs are just waiting for a completion.
    817     *
    818     * XXXMB - can we know when we are truely idle (and not checking
    819     *         the runq)?
    820     */
    821    if ((_PR_IS_NATIVE_THREAD(me) || (thread->cpu != me->cpu)) &&
    822        (!thread->md.thr_bound_cpu)) {
    823      /* The thread should not be in any queue */
    824      PR_ASSERT(thread->queueCount == 0);
    825      if (PostQueuedCompletionStatus(_pr_completion_port, 0, KEY_CVAR,
    826                                     &(thread->md.overlapped.overlapped)) ==
    827          FALSE) {
    828        return PR_FAILURE;
    829      }
    830    }
    831    return PR_SUCCESS;
    832  }
    833 }
    834 
    835 void _PR_MD_INIT_IO() {
    836  WORD WSAVersion = 0x0101;
    837  WSADATA WSAData;
    838  int err;
    839  OSVERSIONINFO OSversion;
    840 
    841  err = WSAStartup(WSAVersion, &WSAData);
    842  PR_ASSERT(0 == err);
    843 
    844  _pr_completion_port =
    845      CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
    846 
    847  _MD_NEW_LOCK(&_pr_recycle_lock);
    848  _MD_NEW_LOCK(&_pr_ioq_lock);
    849 
    850  OSversion.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
    851  if (GetVersionEx(&OSversion)) {
    852    _nt_version_gets_lockfile_completion = PR_FALSE;
    853    if (OSversion.dwMajorVersion >= 4) {
    854      _nt_version_gets_lockfile_completion = PR_TRUE;
    855    }
    856  } else {
    857    PR_ASSERT(0);
    858  }
    859 
    860 #ifdef _NEED_351_FILE_LOCKING_HACK
    861  IsFileLocalInit();
    862 #endif /* _NEED_351_FILE_LOCKING_HACK */
    863 
    864  /*
    865   * UDP support: start up the continuation thread
    866   */
    867 
    868  pt_tq.op_count = 0;
    869  pt_tq.head = pt_tq.tail = NULL;
    870  pt_tq.ml = PR_NewLock();
    871  PR_ASSERT(NULL != pt_tq.ml);
    872  pt_tq.new_op = PR_NewCondVar(pt_tq.ml);
    873  PR_ASSERT(NULL != pt_tq.new_op);
    874 #if defined(DEBUG)
    875  memset(&pt_debug, 0, sizeof(struct pt_debug_s));
    876 #endif
    877 
    878  pt_tq.thread = PR_CreateThread(PR_SYSTEM_THREAD, ContinuationThread, NULL,
    879                                 PR_PRIORITY_URGENT, PR_GLOBAL_THREAD,
    880                                 PR_JOINABLE_THREAD, 0);
    881 
    882  PR_ASSERT(NULL != pt_tq.thread);
    883 
    884 #ifdef DEBUG
    885  /* Doublecheck _pr_filetime_offset's hard-coded value is correct. */
    886  {
    887    SYSTEMTIME systime;
    888    union {
    889      PRTime prt;
    890      FILETIME ft;
    891    } filetime;
    892    BOOL rv;
    893 
    894    systime.wYear = 1970;
    895    systime.wMonth = 1;
    896    /* wDayOfWeek is ignored */
    897    systime.wDay = 1;
    898    systime.wHour = 0;
    899    systime.wMinute = 0;
    900    systime.wSecond = 0;
    901    systime.wMilliseconds = 0;
    902 
    903    rv = SystemTimeToFileTime(&systime, &filetime.ft);
    904    PR_ASSERT(0 != rv);
    905    PR_ASSERT(filetime.prt == _pr_filetime_offset);
    906  }
    907 #endif /* DEBUG */
    908 
    909  _PR_NT_InitSids();
    910 }
    911 
    912 /* --- SOCKET IO --------------------------------------------------------- */
    913 
    914 /* _md_get_recycled_socket()
    915 * Get a socket from the recycle bin; if no sockets are in the bin,
    916 * create one.  The socket will be passed to AcceptEx() as the
    917 * second argument.
    918 */
    919 static SOCKET _md_get_recycled_socket(int af) {
    920  SOCKET rv;
    921 
    922  _MD_LOCK(&_pr_recycle_lock);
    923  if (af == AF_INET && _pr_recycle_INET_tail) {
    924    _pr_recycle_INET_tail--;
    925    rv = _pr_recycle_INET_array[_pr_recycle_INET_tail];
    926    _MD_UNLOCK(&_pr_recycle_lock);
    927    return rv;
    928  }
    929  if (af == AF_INET6 && _pr_recycle_INET6_tail) {
    930    _pr_recycle_INET6_tail--;
    931    rv = _pr_recycle_INET6_array[_pr_recycle_INET6_tail];
    932    _MD_UNLOCK(&_pr_recycle_lock);
    933    return rv;
    934  }
    935  _MD_UNLOCK(&_pr_recycle_lock);
    936 
    937  rv = _PR_MD_SOCKET(af, SOCK_STREAM, 0);
    938  if (rv != INVALID_SOCKET && _md_Associate((HANDLE)rv) == 0) {
    939    closesocket(rv);
    940    return INVALID_SOCKET;
    941  }
    942  return rv;
    943 }
    944 
    945 /* _md_put_recycled_socket()
    946 * Add a socket to the recycle bin.
    947 */
    948 static void _md_put_recycled_socket(SOCKET newsock, int af) {
    949  PR_ASSERT(_pr_recycle_INET_tail >= 0);
    950  PR_ASSERT(_pr_recycle_INET6_tail >= 0);
    951 
    952  _MD_LOCK(&_pr_recycle_lock);
    953  if (af == AF_INET && _pr_recycle_INET_tail < RECYCLE_SIZE) {
    954    _pr_recycle_INET_array[_pr_recycle_INET_tail] = newsock;
    955    _pr_recycle_INET_tail++;
    956    _MD_UNLOCK(&_pr_recycle_lock);
    957  } else if (af == AF_INET6 && _pr_recycle_INET6_tail < RECYCLE_SIZE) {
    958    _pr_recycle_INET6_array[_pr_recycle_INET6_tail] = newsock;
    959    _pr_recycle_INET6_tail++;
    960    _MD_UNLOCK(&_pr_recycle_lock);
    961  } else {
    962    _MD_UNLOCK(&_pr_recycle_lock);
    963    closesocket(newsock);
    964  }
    965 
    966  return;
    967 }
    968 
    969 /* _md_Associate()
    970 * Associates a file with the completion port.
    971 * Returns 0 on failure, 1 on success.
    972 */
    973 PRInt32 _md_Associate(HANDLE file) {
    974  HANDLE port;
    975 
    976  if (!_native_threads_only) {
    977    port = CreateIoCompletionPort((HANDLE)file, _pr_completion_port, KEY_IO, 0);
    978 
    979    /* XXX should map error codes on failures */
    980    return (port == _pr_completion_port);
    981  } else {
    982    return 1;
    983  }
    984 }
    985 
    986 /*
    987 * _md_MakeNonblock()
    988 * Make a socket nonblocking.
    989 * Returns 0 on failure, 1 on success.
    990 */
    991 static PRInt32 _md_MakeNonblock(HANDLE file) {
    992  int rv;
    993  u_long one = 1;
    994 
    995  rv = ioctlsocket((SOCKET)file, FIONBIO, &one);
    996  /* XXX should map error codes on failures */
    997  return (rv == 0);
    998 }
    999 
   1000 static int missing_completions = 0;
   1001 static int max_wait_loops = 0;
   1002 
   1003 static PRInt32 _NT_IO_ABORT(PROsfd sock) {
   1004  PRThread* me = _PR_MD_CURRENT_THREAD();
   1005  PRBool fWait;
   1006  PRInt32 rv;
   1007  int loop_count;
   1008 
   1009  /* This is a clumsy way to abort the IO, but it is all we can do.
   1010   * It looks a bit racy, but we handle all the cases.
   1011   * case 1:  IO completes before calling closesocket
   1012   *     case 1a:  fWait is set to PR_FALSE
   1013   *           This should e the most likely case.  We'll properly
   1014   *           not wait call _NT_IO_WAIT, since the closesocket()
   1015   *           won't be forcing a completion.
   1016   *     case 1b: fWait is set to PR_TRUE
   1017   *           This hopefully won't happen much.  When it does, this
   1018   *           thread will timeout in _NT_IO_WAIT for CLOSE_INTERVAL
   1019   *           before cleaning up.
   1020   * case 2:  IO does not complete before calling closesocket
   1021   *     case 2a: IO never completes
   1022   *           This is the likely case.  We'll close it and wait
   1023   *           for the completion forced by the close.  Return should
   1024   *           be immediate.
   1025   *     case 2b: IO completes just after calling closesocket
   1026   *           Since the closesocket is issued, we'll either get a
   1027   *           completion back for the real IO or for the close.  We
   1028   *           don't really care.  It may not even be possible to get
   1029   *           a real completion here.  In any event, we'll awaken
   1030   *           from NT_IO_WAIT immediately.
   1031   */
   1032 
   1033  _PR_THREAD_LOCK(me);
   1034  fWait = me->io_pending;
   1035  if (fWait) {
   1036    /*
   1037     * If there's still I/O pending, it should have already timed
   1038     * out once before this function is called.
   1039     */
   1040    PR_ASSERT(me->io_suspended == PR_TRUE);
   1041 
   1042    /* Set up to wait for I/O completion again */
   1043    me->state = _PR_IO_WAIT;
   1044    me->io_suspended = PR_FALSE;
   1045    me->md.interrupt_disabled = PR_TRUE;
   1046  }
   1047  _PR_THREAD_UNLOCK(me);
   1048 
   1049  /* Close the socket if there is one */
   1050  if (sock != INVALID_SOCKET) {
   1051    rv = closesocket((SOCKET)sock);
   1052  }
   1053 
   1054  /* If there was I/O pending before the close, wait for it to complete */
   1055  if (fWait) {
   1056    /* Wait and wait for the I/O to complete */
   1057    for (loop_count = 0; fWait; ++loop_count) {
   1058      _NT_IO_WAIT(me, CLOSE_TIMEOUT);
   1059 
   1060      _PR_THREAD_LOCK(me);
   1061      fWait = me->io_pending;
   1062      if (fWait) {
   1063        PR_ASSERT(me->io_suspended == PR_TRUE);
   1064        me->state = _PR_IO_WAIT;
   1065        me->io_suspended = PR_FALSE;
   1066      }
   1067      _PR_THREAD_UNLOCK(me);
   1068 
   1069      if (loop_count > max_wait_loops) {
   1070        max_wait_loops = loop_count;
   1071      }
   1072    }
   1073 
   1074    if (loop_count > 1) {
   1075      ++missing_completions;
   1076    }
   1077 
   1078    me->md.interrupt_disabled = PR_FALSE;
   1079    me->io_pending = PR_FALSE;
   1080    me->state = _PR_RUNNING;
   1081  }
   1082 
   1083  PR_ASSERT(me->io_pending == PR_FALSE);
   1084  me->md.thr_bound_cpu = NULL;
   1085  me->io_suspended = PR_FALSE;
   1086 
   1087  return rv;
   1088 }
   1089 
   1090 PROsfd _PR_MD_SOCKET(int af, int type, int flags) {
   1091  SOCKET sock;
   1092 
   1093  sock = socket(af, type, flags);
   1094 
   1095  if (sock == INVALID_SOCKET) {
   1096    _PR_MD_MAP_SOCKET_ERROR(WSAGetLastError());
   1097  }
   1098 
   1099  return (PROsfd)sock;
   1100 }
   1101 
   1102 struct connect_data_s {
   1103  PRInt32 status;
   1104  PRInt32 error;
   1105  PROsfd osfd;
   1106  struct sockaddr* addr;
   1107  PRUint32 addrlen;
   1108  PRIntervalTime timeout;
   1109 };
   1110 
   1111 void _PR_MD_connect_thread(void* cdata) {
   1112  struct connect_data_s* cd = (struct connect_data_s*)cdata;
   1113 
   1114  cd->status = connect(cd->osfd, cd->addr, cd->addrlen);
   1115 
   1116  if (cd->status == SOCKET_ERROR) {
   1117    cd->error = WSAGetLastError();
   1118  }
   1119 
   1120  return;
   1121 }
   1122 
   1123 PRInt32 _PR_MD_CONNECT(PRFileDesc* fd, const PRNetAddr* addr, PRUint32 addrlen,
   1124                       PRIntervalTime timeout) {
   1125  PROsfd osfd = fd->secret->md.osfd;
   1126  PRInt32 rv, err;
   1127  u_long nbio;
   1128  PRInt32 rc;
   1129 
   1130  if (fd->secret->nonblocking) {
   1131    if (!fd->secret->md.io_model_committed) {
   1132      rv = _md_MakeNonblock((HANDLE)osfd);
   1133      PR_ASSERT(0 != rv);
   1134      fd->secret->md.io_model_committed = PR_TRUE;
   1135    }
   1136 
   1137    if ((rv = connect(osfd, (struct sockaddr*)addr, addrlen)) == -1) {
   1138      err = WSAGetLastError();
   1139      _PR_MD_MAP_CONNECT_ERROR(err);
   1140    }
   1141    return rv;
   1142  }
   1143 
   1144  /*
   1145   * Temporarily make the socket non-blocking so that we can
   1146   * initiate a non-blocking connect and wait for its completion
   1147   * (with a timeout) in select.
   1148   */
   1149  PR_ASSERT(!fd->secret->md.io_model_committed);
   1150  nbio = 1;
   1151  rv = ioctlsocket((SOCKET)osfd, FIONBIO, &nbio);
   1152  PR_ASSERT(0 == rv);
   1153 
   1154  rc = _nt_nonblock_connect(fd, (struct sockaddr*)addr, addrlen, timeout);
   1155 
   1156  /* Set the socket back to blocking. */
   1157  nbio = 0;
   1158  rv = ioctlsocket((SOCKET)osfd, FIONBIO, &nbio);
   1159  PR_ASSERT(0 == rv);
   1160 
   1161  return rc;
   1162 }
   1163 
   1164 PRInt32 _PR_MD_BIND(PRFileDesc* fd, const PRNetAddr* addr, PRUint32 addrlen) {
   1165  PRInt32 rv;
   1166 #if 0
   1167    int one = 1;
   1168 #endif
   1169 
   1170  rv =
   1171      bind(fd->secret->md.osfd, (const struct sockaddr*)&(addr->inet), addrlen);
   1172 
   1173  if (rv == SOCKET_ERROR) {
   1174    _PR_MD_MAP_BIND_ERROR(WSAGetLastError());
   1175    return -1;
   1176  }
   1177 
   1178 #if 0
   1179    /* Disable nagle- so far unknown if this is good or not...
   1180     */
   1181    rv = setsockopt(fd->secret->md.osfd,
   1182                    SOL_SOCKET,
   1183                    TCP_NODELAY,
   1184                    (const char *)&one,
   1185                    sizeof(one));
   1186    PR_ASSERT(rv == 0);
   1187 #endif
   1188 
   1189  return 0;
   1190 }
   1191 
   1192 void _PR_MD_UPDATE_ACCEPT_CONTEXT(PROsfd accept_sock, PROsfd listen_sock) {
   1193  /* Sockets accept()'d with AcceptEx need to call this setsockopt before
   1194   * calling anything other than ReadFile(), WriteFile(), send(), recv(),
   1195   * Transmitfile(), and closesocket().  In order to call any other
   1196   * winsock functions, we have to make this setsockopt call.
   1197   *
   1198   * XXXMB - For the server, we *NEVER* need this in
   1199   * the "normal" code path.  But now we have to call it.  This is a waste
   1200   * of a system call.  We'd like to only call it before calling the
   1201   * obscure socket calls, but since we don't know at that point what the
   1202   * original socket was (or even if it is still alive) we can't do it
   1203   * at that point...
   1204   */
   1205  setsockopt((SOCKET)accept_sock, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
   1206             (char*)&listen_sock, sizeof(listen_sock));
   1207 }
   1208 
   1209 #define INET_ADDR_PADDED (sizeof(PRNetAddr) + 16)
   1210 PROsfd _PR_MD_FAST_ACCEPT(PRFileDesc* fd, PRNetAddr* raddr, PRUint32* rlen,
   1211                          PRIntervalTime timeout, PRBool fast,
   1212                          _PR_AcceptTimeoutCallback callback,
   1213                          void* callbackArg) {
   1214  PROsfd osfd = fd->secret->md.osfd;
   1215  PRThread* me = _PR_MD_CURRENT_THREAD();
   1216  SOCKET accept_sock;
   1217  int bytes;
   1218  PRNetAddr* Laddr;
   1219  PRNetAddr* Raddr;
   1220  PRUint32 llen, err;
   1221  int rv;
   1222 
   1223  if (_NT_USE_NB_IO(fd)) {
   1224    if (!fd->secret->md.io_model_committed) {
   1225      rv = _md_MakeNonblock((HANDLE)osfd);
   1226      PR_ASSERT(0 != rv);
   1227      fd->secret->md.io_model_committed = PR_TRUE;
   1228    }
   1229    /*
   1230     * The accepted socket inherits the nonblocking and
   1231     * inheritable (HANDLE_FLAG_INHERIT) attributes of
   1232     * the listening socket.
   1233     */
   1234    accept_sock =
   1235        _nt_nonblock_accept(fd, (struct sockaddr*)raddr, rlen, timeout);
   1236    if (!fd->secret->nonblocking) {
   1237      u_long zero = 0;
   1238 
   1239      rv = ioctlsocket(accept_sock, FIONBIO, &zero);
   1240      PR_ASSERT(0 == rv);
   1241    }
   1242    return accept_sock;
   1243  }
   1244 
   1245  if (me->io_suspended) {
   1246    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   1247    return -1;
   1248  }
   1249 
   1250  if (!fd->secret->md.io_model_committed) {
   1251    rv = _md_Associate((HANDLE)osfd);
   1252    PR_ASSERT(0 != rv);
   1253    fd->secret->md.io_model_committed = PR_TRUE;
   1254  }
   1255 
   1256  if (!me->md.acceptex_buf) {
   1257    me->md.acceptex_buf = PR_MALLOC(2 * INET_ADDR_PADDED);
   1258    if (!me->md.acceptex_buf) {
   1259      PR_SetError(PR_OUT_OF_MEMORY_ERROR, 0);
   1260      return -1;
   1261    }
   1262  }
   1263 
   1264  accept_sock = _md_get_recycled_socket(fd->secret->af);
   1265  if (accept_sock == INVALID_SOCKET) {
   1266    return -1;
   1267  }
   1268 
   1269  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   1270  if (_native_threads_only) {
   1271    me->md.overlapped.overlapped.hEvent = me->md.thr_event;
   1272  }
   1273 
   1274  _PR_THREAD_LOCK(me);
   1275  if (_PR_PENDING_INTERRUPT(me)) {
   1276    me->flags &= ~_PR_INTERRUPT;
   1277    PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1278    _PR_THREAD_UNLOCK(me);
   1279    closesocket(accept_sock);
   1280    return -1;
   1281  }
   1282  me->io_pending = PR_TRUE;
   1283  me->state = _PR_IO_WAIT;
   1284  _PR_THREAD_UNLOCK(me);
   1285  me->io_fd = osfd;
   1286 
   1287  rv = AcceptEx((SOCKET)osfd, accept_sock, me->md.acceptex_buf, 0,
   1288                INET_ADDR_PADDED, INET_ADDR_PADDED, &bytes,
   1289                &(me->md.overlapped.overlapped));
   1290 
   1291  if ((rv == 0) && ((err = WSAGetLastError()) != ERROR_IO_PENDING)) {
   1292    /* Argh! The IO failed */
   1293    closesocket(accept_sock);
   1294    _PR_THREAD_LOCK(me);
   1295    me->io_pending = PR_FALSE;
   1296    me->state = _PR_RUNNING;
   1297    if (_PR_PENDING_INTERRUPT(me)) {
   1298      me->flags &= ~_PR_INTERRUPT;
   1299      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1300      _PR_THREAD_UNLOCK(me);
   1301      return -1;
   1302    }
   1303    _PR_THREAD_UNLOCK(me);
   1304 
   1305    _PR_MD_MAP_ACCEPTEX_ERROR(err);
   1306    return -1;
   1307  }
   1308 
   1309  if (_native_threads_only && rv) {
   1310    _native_thread_io_nowait(me, rv, bytes);
   1311  } else if (_NT_IO_WAIT(me, timeout) == PR_FAILURE) {
   1312    PR_ASSERT(0);
   1313    closesocket(accept_sock);
   1314    return -1;
   1315  }
   1316 
   1317  PR_ASSERT(me->io_pending == PR_FALSE || me->io_suspended == PR_TRUE);
   1318 
   1319  if (me->io_suspended) {
   1320    closesocket(accept_sock);
   1321    if (_PR_PENDING_INTERRUPT(me)) {
   1322      me->flags &= ~_PR_INTERRUPT;
   1323      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1324    } else {
   1325      PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   1326    }
   1327    return -1;
   1328  }
   1329 
   1330  if (me->md.blocked_io_status == 0) {
   1331    closesocket(accept_sock);
   1332    _PR_MD_MAP_ACCEPTEX_ERROR(me->md.blocked_io_error);
   1333    return -1;
   1334  }
   1335 
   1336  if (!fast) {
   1337    _PR_MD_UPDATE_ACCEPT_CONTEXT((SOCKET)accept_sock, (SOCKET)osfd);
   1338  }
   1339 
   1340  /* IO is done */
   1341  GetAcceptExSockaddrs(me->md.acceptex_buf, 0, INET_ADDR_PADDED,
   1342                       INET_ADDR_PADDED, (LPSOCKADDR*)&(Laddr), &llen,
   1343                       (LPSOCKADDR*)&(Raddr), (unsigned int*)rlen);
   1344 
   1345  if (raddr != NULL) {
   1346    memcpy((char*)raddr, (char*)&Raddr->inet, *rlen);
   1347  }
   1348 
   1349  PR_ASSERT(me->io_pending == PR_FALSE);
   1350 
   1351  return accept_sock;
   1352 }
   1353 
   1354 PRInt32 _PR_MD_FAST_ACCEPT_READ(PRFileDesc* sd, PROsfd* newSock,
   1355                                PRNetAddr** raddr, void* buf, PRInt32 amount,
   1356                                PRIntervalTime timeout, PRBool fast,
   1357                                _PR_AcceptTimeoutCallback callback,
   1358                                void* callbackArg) {
   1359  PROsfd sock = sd->secret->md.osfd;
   1360  PRThread* me = _PR_MD_CURRENT_THREAD();
   1361  int bytes;
   1362  PRNetAddr* Laddr;
   1363  PRUint32 llen, rlen, err;
   1364  int rv;
   1365  PRBool isConnected;
   1366  PRBool madeCallback = PR_FALSE;
   1367 
   1368  if (me->io_suspended) {
   1369    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   1370    return -1;
   1371  }
   1372 
   1373  if (!sd->secret->md.io_model_committed) {
   1374    rv = _md_Associate((HANDLE)sock);
   1375    PR_ASSERT(0 != rv);
   1376    sd->secret->md.io_model_committed = PR_TRUE;
   1377  }
   1378 
   1379  *newSock = _md_get_recycled_socket(sd->secret->af);
   1380  if (*newSock == INVALID_SOCKET) {
   1381    return -1;
   1382  }
   1383 
   1384  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   1385  if (_native_threads_only) {
   1386    me->md.overlapped.overlapped.hEvent = me->md.thr_event;
   1387  }
   1388 
   1389  _PR_THREAD_LOCK(me);
   1390  if (_PR_PENDING_INTERRUPT(me)) {
   1391    me->flags &= ~_PR_INTERRUPT;
   1392    PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1393    _PR_THREAD_UNLOCK(me);
   1394    closesocket(*newSock);
   1395    return -1;
   1396  }
   1397  me->io_pending = PR_TRUE;
   1398  me->state = _PR_IO_WAIT;
   1399  _PR_THREAD_UNLOCK(me);
   1400  me->io_fd = sock;
   1401 
   1402  rv = AcceptEx((SOCKET)sock, *newSock, buf, amount, INET_ADDR_PADDED,
   1403                INET_ADDR_PADDED, &bytes, &(me->md.overlapped.overlapped));
   1404 
   1405  if ((rv == 0) && ((err = GetLastError()) != ERROR_IO_PENDING)) {
   1406    closesocket(*newSock);
   1407    _PR_THREAD_LOCK(me);
   1408    me->io_pending = PR_FALSE;
   1409    me->state = _PR_RUNNING;
   1410    if (_PR_PENDING_INTERRUPT(me)) {
   1411      me->flags &= ~_PR_INTERRUPT;
   1412      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1413      _PR_THREAD_UNLOCK(me);
   1414      return -1;
   1415    }
   1416    _PR_THREAD_UNLOCK(me);
   1417 
   1418    _PR_MD_MAP_ACCEPTEX_ERROR(err);
   1419    return -1;
   1420  }
   1421 
   1422  if (_native_threads_only && rv) {
   1423    _native_thread_io_nowait(me, rv, bytes);
   1424  } else if (_NT_IO_WAIT(me, timeout) == PR_FAILURE) {
   1425    PR_ASSERT(0);
   1426    closesocket(*newSock);
   1427    return -1;
   1428  }
   1429 
   1430 retry:
   1431  if (me->io_suspended) {
   1432    PRInt32 err;
   1433    INT seconds;
   1434    INT bytes = sizeof(seconds);
   1435 
   1436    PR_ASSERT(timeout != PR_INTERVAL_NO_TIMEOUT);
   1437 
   1438    err = getsockopt(*newSock, SOL_SOCKET, SO_CONNECT_TIME, (char*)&seconds,
   1439                     (PINT)&bytes);
   1440    if (err == NO_ERROR) {
   1441      PRIntervalTime elapsed = PR_SecondsToInterval(seconds);
   1442 
   1443      if (seconds == 0xffffffff) {
   1444        isConnected = PR_FALSE;
   1445      } else {
   1446        isConnected = PR_TRUE;
   1447      }
   1448 
   1449      if (!isConnected) {
   1450        if (madeCallback == PR_FALSE && callback) {
   1451          callback(callbackArg);
   1452        }
   1453        madeCallback = PR_TRUE;
   1454        me->state = _PR_IO_WAIT;
   1455        if (_NT_ResumeIO(me, timeout) == PR_FAILURE) {
   1456          closesocket(*newSock);
   1457          return -1;
   1458        }
   1459        goto retry;
   1460      }
   1461 
   1462      if (elapsed < timeout) {
   1463        /* Socket is connected but time not elapsed, RESUME IO */
   1464        timeout -= elapsed;
   1465        me->state = _PR_IO_WAIT;
   1466        if (_NT_ResumeIO(me, timeout) == PR_FAILURE) {
   1467          closesocket(*newSock);
   1468          return -1;
   1469        }
   1470        goto retry;
   1471      }
   1472    } else {
   1473      /*  What to do here? Assume socket not open?*/
   1474      PR_ASSERT(0);
   1475      isConnected = PR_FALSE;
   1476    }
   1477 
   1478    rv = _NT_IO_ABORT(*newSock);
   1479 
   1480    PR_ASSERT(me->io_pending == PR_FALSE);
   1481    PR_ASSERT(me->io_suspended == PR_FALSE);
   1482    PR_ASSERT(me->md.thr_bound_cpu == NULL);
   1483    /* If the IO is still suspended, it means we didn't get any
   1484     * completion from NT_IO_WAIT.  This is not disasterous, I hope,
   1485     * but it may mean we still have an IO outstanding...  Try to
   1486     * recover by just allowing ourselves to continue.
   1487     */
   1488    me->io_suspended = PR_FALSE;
   1489    if (_PR_PENDING_INTERRUPT(me)) {
   1490      me->flags &= ~_PR_INTERRUPT;
   1491      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1492    } else {
   1493      PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   1494    }
   1495    me->state = _PR_RUNNING;
   1496    closesocket(*newSock);
   1497    return -1;
   1498  }
   1499 
   1500  PR_ASSERT(me->io_pending == PR_FALSE);
   1501  PR_ASSERT(me->io_suspended == PR_FALSE);
   1502  PR_ASSERT(me->md.thr_bound_cpu == NULL);
   1503 
   1504  if (me->md.blocked_io_status == 0) {
   1505    _PR_MD_MAP_ACCEPTEX_ERROR(me->md.blocked_io_error);
   1506    closesocket(*newSock);
   1507    return -1;
   1508  }
   1509 
   1510  if (!fast) {
   1511    _PR_MD_UPDATE_ACCEPT_CONTEXT((SOCKET)*newSock, (SOCKET)sock);
   1512  }
   1513 
   1514  /* IO is done */
   1515  GetAcceptExSockaddrs(buf, amount, INET_ADDR_PADDED, INET_ADDR_PADDED,
   1516                       (LPSOCKADDR*)&(Laddr), &llen, (LPSOCKADDR*)(raddr),
   1517                       (unsigned int*)&rlen);
   1518 
   1519  return me->md.blocked_io_bytes;
   1520 }
   1521 
   1522 PRInt32 _PR_MD_SENDFILE(PRFileDesc* sock, PRSendFileData* sfd, PRInt32 flags,
   1523                        PRIntervalTime timeout) {
   1524  PRThread* me = _PR_MD_CURRENT_THREAD();
   1525  PRInt32 tflags;
   1526  int rv, err;
   1527 
   1528  if (me->io_suspended) {
   1529    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   1530    return -1;
   1531  }
   1532 
   1533  if (!sock->secret->md.io_model_committed) {
   1534    rv = _md_Associate((HANDLE)sock->secret->md.osfd);
   1535    PR_ASSERT(0 != rv);
   1536    sock->secret->md.io_model_committed = PR_TRUE;
   1537  }
   1538  if (!me->md.xmit_bufs) {
   1539    me->md.xmit_bufs = PR_NEW(TRANSMIT_FILE_BUFFERS);
   1540    if (!me->md.xmit_bufs) {
   1541      PR_SetError(PR_OUT_OF_MEMORY_ERROR, 0);
   1542      return -1;
   1543    }
   1544  }
   1545  me->md.xmit_bufs->Head = (void*)sfd->header;
   1546  me->md.xmit_bufs->HeadLength = sfd->hlen;
   1547  me->md.xmit_bufs->Tail = (void*)sfd->trailer;
   1548  me->md.xmit_bufs->TailLength = sfd->tlen;
   1549 
   1550  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   1551  me->md.overlapped.overlapped.Offset = sfd->file_offset;
   1552  if (_native_threads_only) {
   1553    me->md.overlapped.overlapped.hEvent = me->md.thr_event;
   1554  }
   1555 
   1556  tflags = 0;
   1557  if (flags & PR_TRANSMITFILE_CLOSE_SOCKET) {
   1558    tflags = TF_DISCONNECT | TF_REUSE_SOCKET;
   1559  }
   1560 
   1561  _PR_THREAD_LOCK(me);
   1562  if (_PR_PENDING_INTERRUPT(me)) {
   1563    me->flags &= ~_PR_INTERRUPT;
   1564    PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1565    _PR_THREAD_UNLOCK(me);
   1566    return -1;
   1567  }
   1568  me->io_pending = PR_TRUE;
   1569  me->state = _PR_IO_WAIT;
   1570  _PR_THREAD_UNLOCK(me);
   1571  me->io_fd = sock->secret->md.osfd;
   1572 
   1573  rv = TransmitFile((SOCKET)sock->secret->md.osfd,
   1574                    (HANDLE)sfd->fd->secret->md.osfd, (DWORD)sfd->file_nbytes,
   1575                    (DWORD)0, (LPOVERLAPPED) & (me->md.overlapped.overlapped),
   1576                    (TRANSMIT_FILE_BUFFERS*)me->md.xmit_bufs, (DWORD)tflags);
   1577  if ((rv == 0) && ((err = GetLastError()) != ERROR_IO_PENDING)) {
   1578    _PR_THREAD_LOCK(me);
   1579    me->io_pending = PR_FALSE;
   1580    me->state = _PR_RUNNING;
   1581    if (_PR_PENDING_INTERRUPT(me)) {
   1582      me->flags &= ~_PR_INTERRUPT;
   1583      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1584      _PR_THREAD_UNLOCK(me);
   1585      return -1;
   1586    }
   1587    _PR_THREAD_UNLOCK(me);
   1588 
   1589    _PR_MD_MAP_TRANSMITFILE_ERROR(err);
   1590    return -1;
   1591  }
   1592 
   1593  if (_NT_IO_WAIT(me, timeout) == PR_FAILURE) {
   1594    PR_ASSERT(0);
   1595    return -1;
   1596  }
   1597 
   1598  PR_ASSERT(me->io_pending == PR_FALSE || me->io_suspended == PR_TRUE);
   1599 
   1600  if (me->io_suspended) {
   1601    if (_PR_PENDING_INTERRUPT(me)) {
   1602      me->flags &= ~_PR_INTERRUPT;
   1603      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1604    } else {
   1605      PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   1606    }
   1607    return -1;
   1608  }
   1609 
   1610  if (me->md.blocked_io_status == 0) {
   1611    _PR_MD_MAP_TRANSMITFILE_ERROR(me->md.blocked_io_error);
   1612    return -1;
   1613  }
   1614 
   1615  if (flags & PR_TRANSMITFILE_CLOSE_SOCKET) {
   1616    _md_put_recycled_socket(sock->secret->md.osfd, sock->secret->af);
   1617  }
   1618 
   1619  PR_ASSERT(me->io_pending == PR_FALSE);
   1620 
   1621  return me->md.blocked_io_bytes;
   1622 }
   1623 
   1624 PRInt32 _PR_MD_RECV(PRFileDesc* fd, void* buf, PRInt32 amount, PRIntn flags,
   1625                    PRIntervalTime timeout) {
   1626  PROsfd osfd = fd->secret->md.osfd;
   1627  PRThread* me = _PR_MD_CURRENT_THREAD();
   1628  int bytes;
   1629  int rv, err;
   1630 
   1631  if (_NT_USE_NB_IO(fd)) {
   1632    if (!fd->secret->md.io_model_committed) {
   1633      rv = _md_MakeNonblock((HANDLE)osfd);
   1634      PR_ASSERT(0 != rv);
   1635      fd->secret->md.io_model_committed = PR_TRUE;
   1636    }
   1637    return _nt_nonblock_recv(fd, buf, amount, flags, timeout);
   1638  }
   1639 
   1640  if (me->io_suspended) {
   1641    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   1642    return -1;
   1643  }
   1644 
   1645  if (!fd->secret->md.io_model_committed) {
   1646    rv = _md_Associate((HANDLE)osfd);
   1647    PR_ASSERT(0 != rv);
   1648    fd->secret->md.io_model_committed = PR_TRUE;
   1649  }
   1650 
   1651  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   1652  if (_native_threads_only) {
   1653    me->md.overlapped.overlapped.hEvent = me->md.thr_event;
   1654  }
   1655 
   1656  _PR_THREAD_LOCK(me);
   1657  if (_PR_PENDING_INTERRUPT(me)) {
   1658    me->flags &= ~_PR_INTERRUPT;
   1659    PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1660    _PR_THREAD_UNLOCK(me);
   1661    return -1;
   1662  }
   1663  me->io_pending = PR_TRUE;
   1664  me->state = _PR_IO_WAIT;
   1665  _PR_THREAD_UNLOCK(me);
   1666  me->io_fd = osfd;
   1667 
   1668  rv = ReadFile((HANDLE)osfd, buf, amount, &bytes,
   1669                &(me->md.overlapped.overlapped));
   1670  if ((rv == 0) && (GetLastError() != ERROR_IO_PENDING)) {
   1671    _PR_THREAD_LOCK(me);
   1672    me->io_pending = PR_FALSE;
   1673    me->state = _PR_RUNNING;
   1674    if (_PR_PENDING_INTERRUPT(me)) {
   1675      me->flags &= ~_PR_INTERRUPT;
   1676      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1677      _PR_THREAD_UNLOCK(me);
   1678      return -1;
   1679    }
   1680    _PR_THREAD_UNLOCK(me);
   1681 
   1682    if ((err = GetLastError()) == ERROR_HANDLE_EOF) {
   1683      return 0;
   1684    }
   1685    _PR_MD_MAP_READ_ERROR(err);
   1686    return -1;
   1687  }
   1688 
   1689  if (_native_threads_only && rv) {
   1690    _native_thread_io_nowait(me, rv, bytes);
   1691  } else if (_NT_IO_WAIT(me, timeout) == PR_FAILURE) {
   1692    PR_ASSERT(0);
   1693    return -1;
   1694  }
   1695 
   1696  PR_ASSERT(me->io_pending == PR_FALSE || me->io_suspended == PR_TRUE);
   1697 
   1698  if (me->io_suspended) {
   1699    if (_PR_PENDING_INTERRUPT(me)) {
   1700      me->flags &= ~_PR_INTERRUPT;
   1701      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1702    } else {
   1703      PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   1704    }
   1705    return -1;
   1706  }
   1707 
   1708  if (me->md.blocked_io_status == 0) {
   1709    if (me->md.blocked_io_error == ERROR_HANDLE_EOF) {
   1710      return 0;
   1711    }
   1712    _PR_MD_MAP_READ_ERROR(me->md.blocked_io_error);
   1713    return -1;
   1714  }
   1715 
   1716  PR_ASSERT(me->io_pending == PR_FALSE);
   1717 
   1718  return me->md.blocked_io_bytes;
   1719 }
   1720 
   1721 PRInt32 _PR_MD_SEND(PRFileDesc* fd, const void* buf, PRInt32 amount,
   1722                    PRIntn flags, PRIntervalTime timeout) {
   1723  PROsfd osfd = fd->secret->md.osfd;
   1724  PRThread* me = _PR_MD_CURRENT_THREAD();
   1725  int bytes;
   1726  int rv, err;
   1727 
   1728  if (_NT_USE_NB_IO(fd)) {
   1729    if (!fd->secret->md.io_model_committed) {
   1730      rv = _md_MakeNonblock((HANDLE)osfd);
   1731      PR_ASSERT(0 != rv);
   1732      fd->secret->md.io_model_committed = PR_TRUE;
   1733    }
   1734    return _nt_nonblock_send(fd, (char*)buf, amount, timeout);
   1735  }
   1736 
   1737  if (me->io_suspended) {
   1738    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   1739    return -1;
   1740  }
   1741 
   1742  if (!fd->secret->md.io_model_committed) {
   1743    rv = _md_Associate((HANDLE)osfd);
   1744    PR_ASSERT(0 != rv);
   1745    fd->secret->md.io_model_committed = PR_TRUE;
   1746  }
   1747 
   1748  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   1749  if (_native_threads_only) {
   1750    me->md.overlapped.overlapped.hEvent = me->md.thr_event;
   1751  }
   1752 
   1753  _PR_THREAD_LOCK(me);
   1754  if (_PR_PENDING_INTERRUPT(me)) {
   1755    me->flags &= ~_PR_INTERRUPT;
   1756    PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1757    _PR_THREAD_UNLOCK(me);
   1758    return -1;
   1759  }
   1760  me->io_pending = PR_TRUE;
   1761  me->state = _PR_IO_WAIT;
   1762  _PR_THREAD_UNLOCK(me);
   1763  me->io_fd = osfd;
   1764 
   1765  rv = WriteFile((HANDLE)osfd, buf, amount, &bytes,
   1766                 &(me->md.overlapped.overlapped));
   1767  if ((rv == 0) && ((err = GetLastError()) != ERROR_IO_PENDING)) {
   1768    _PR_THREAD_LOCK(me);
   1769    me->io_pending = PR_FALSE;
   1770    me->state = _PR_RUNNING;
   1771    if (_PR_PENDING_INTERRUPT(me)) {
   1772      me->flags &= ~_PR_INTERRUPT;
   1773      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1774      _PR_THREAD_UNLOCK(me);
   1775      return -1;
   1776    }
   1777    _PR_THREAD_UNLOCK(me);
   1778 
   1779    _PR_MD_MAP_WRITE_ERROR(err);
   1780    return -1;
   1781  }
   1782 
   1783  if (_native_threads_only && rv) {
   1784    _native_thread_io_nowait(me, rv, bytes);
   1785  } else if (_NT_IO_WAIT(me, timeout) == PR_FAILURE) {
   1786    PR_ASSERT(0);
   1787    return -1;
   1788  }
   1789 
   1790  PR_ASSERT(me->io_pending == PR_FALSE || me->io_suspended == PR_TRUE);
   1791 
   1792  if (me->io_suspended) {
   1793    if (_PR_PENDING_INTERRUPT(me)) {
   1794      me->flags &= ~_PR_INTERRUPT;
   1795      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   1796    } else {
   1797      PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   1798    }
   1799    return -1;
   1800  }
   1801 
   1802  if (me->md.blocked_io_status == 0) {
   1803    _PR_MD_MAP_WRITE_ERROR(me->md.blocked_io_error);
   1804    return -1;
   1805  }
   1806 
   1807  PR_ASSERT(me->io_pending == PR_FALSE);
   1808 
   1809  return me->md.blocked_io_bytes;
   1810 }
   1811 
   1812 PRInt32 _PR_MD_SENDTO(PRFileDesc* fd, const void* buf, PRInt32 amount,
   1813                      PRIntn flags, const PRNetAddr* addr, PRUint32 addrlen,
   1814                      PRIntervalTime timeout) {
   1815  PROsfd osfd = fd->secret->md.osfd;
   1816  PRInt32 rv;
   1817 
   1818  if (!fd->secret->md.io_model_committed) {
   1819    rv = _md_MakeNonblock((HANDLE)osfd);
   1820    PR_ASSERT(0 != rv);
   1821    fd->secret->md.io_model_committed = PR_TRUE;
   1822  }
   1823  if (_NT_USE_NB_IO(fd)) {
   1824    return _nt_nonblock_sendto(fd, buf, amount, (struct sockaddr*)addr, addrlen,
   1825                               timeout);
   1826  } else {
   1827    return pt_SendTo(osfd, buf, amount, flags, addr, addrlen, timeout);
   1828  }
   1829 }
   1830 
   1831 PRInt32 _PR_MD_RECVFROM(PRFileDesc* fd, void* buf, PRInt32 amount, PRIntn flags,
   1832                        PRNetAddr* addr, PRUint32* addrlen,
   1833                        PRIntervalTime timeout) {
   1834  PROsfd osfd = fd->secret->md.osfd;
   1835  PRInt32 rv;
   1836 
   1837  if (!fd->secret->md.io_model_committed) {
   1838    rv = _md_MakeNonblock((HANDLE)osfd);
   1839    PR_ASSERT(0 != rv);
   1840    fd->secret->md.io_model_committed = PR_TRUE;
   1841  }
   1842  if (_NT_USE_NB_IO(fd)) {
   1843    return _nt_nonblock_recvfrom(fd, buf, amount, (struct sockaddr*)addr,
   1844                                 addrlen, timeout);
   1845  } else {
   1846    return pt_RecvFrom(osfd, buf, amount, flags, addr, addrlen, timeout);
   1847  }
   1848 }
   1849 
   1850 /* XXXMB - for now this is a sockets call only */
   1851 PRInt32 _PR_MD_WRITEV(PRFileDesc* fd, const PRIOVec* iov, PRInt32 iov_size,
   1852                      PRIntervalTime timeout) {
   1853  PROsfd osfd = fd->secret->md.osfd;
   1854  int index;
   1855  int sent = 0;
   1856  int rv;
   1857 
   1858  if (_NT_USE_NB_IO(fd)) {
   1859    if (!fd->secret->md.io_model_committed) {
   1860      rv = _md_MakeNonblock((HANDLE)osfd);
   1861      PR_ASSERT(0 != rv);
   1862      fd->secret->md.io_model_committed = PR_TRUE;
   1863    }
   1864    return _nt_nonblock_writev(fd, iov, iov_size, timeout);
   1865  }
   1866 
   1867  for (index = 0; index < iov_size; index++) {
   1868    rv = _PR_MD_SEND(fd, iov[index].iov_base, iov[index].iov_len, 0, timeout);
   1869    if (rv > 0) {
   1870      sent += rv;
   1871    }
   1872    if (rv != iov[index].iov_len) {
   1873      if (sent <= 0) {
   1874        return -1;
   1875      }
   1876      return -1;
   1877    }
   1878  }
   1879 
   1880  return sent;
   1881 }
   1882 
   1883 PRInt32 _PR_MD_LISTEN(PRFileDesc* fd, PRIntn backlog) {
   1884  PRInt32 rv;
   1885 
   1886  rv = listen(fd->secret->md.osfd, backlog);
   1887  if (rv < 0) {
   1888    _PR_MD_MAP_LISTEN_ERROR(WSAGetLastError());
   1889  }
   1890  return (rv);
   1891 }
   1892 
   1893 PRInt32 _PR_MD_SHUTDOWN(PRFileDesc* fd, PRIntn how) {
   1894  PRInt32 rv;
   1895 
   1896  rv = shutdown(fd->secret->md.osfd, how);
   1897  if (rv < 0) {
   1898    _PR_MD_MAP_SHUTDOWN_ERROR(WSAGetLastError());
   1899  }
   1900  return (rv);
   1901 }
   1902 
   1903 PRStatus _PR_MD_GETSOCKNAME(PRFileDesc* fd, PRNetAddr* addr, PRUint32* len) {
   1904  PRInt32 rv;
   1905 
   1906  rv = getsockname((SOCKET)fd->secret->md.osfd, (struct sockaddr*)addr, len);
   1907  if (rv == 0) {
   1908    return PR_SUCCESS;
   1909  } else {
   1910    _PR_MD_MAP_GETSOCKNAME_ERROR(WSAGetLastError());
   1911    return PR_FAILURE;
   1912  }
   1913 }
   1914 
   1915 PRStatus _PR_MD_GETPEERNAME(PRFileDesc* fd, PRNetAddr* addr, PRUint32* len) {
   1916  PRInt32 rv;
   1917 
   1918  /*
   1919   * NT has a bug that, when invoked on a socket accepted by
   1920   * AcceptEx(), getpeername() returns an all-zero peer address.
   1921   * To work around this bug, we store the peer's address (returned
   1922   * by AcceptEx()) with the socket fd and use the cached peer
   1923   * address if the socket is an accepted socket.
   1924   */
   1925 
   1926  if (fd->secret->md.accepted_socket) {
   1927    INT seconds;
   1928    INT bytes = sizeof(seconds);
   1929 
   1930    /*
   1931     * Determine if the socket is connected.
   1932     */
   1933 
   1934    rv = getsockopt(fd->secret->md.osfd, SOL_SOCKET, SO_CONNECT_TIME,
   1935                    (char*)&seconds, (PINT)&bytes);
   1936    if (rv == NO_ERROR) {
   1937      if (seconds == 0xffffffff) {
   1938        PR_SetError(PR_NOT_CONNECTED_ERROR, 0);
   1939        return PR_FAILURE;
   1940      }
   1941      *len = PR_NETADDR_SIZE(&fd->secret->md.peer_addr);
   1942      memcpy(addr, &fd->secret->md.peer_addr, *len);
   1943      return PR_SUCCESS;
   1944    } else {
   1945      _PR_MD_MAP_GETSOCKOPT_ERROR(WSAGetLastError());
   1946      return PR_FAILURE;
   1947    }
   1948  } else {
   1949    rv = getpeername((SOCKET)fd->secret->md.osfd, (struct sockaddr*)addr, len);
   1950    if (rv == 0) {
   1951      return PR_SUCCESS;
   1952    } else {
   1953      _PR_MD_MAP_GETPEERNAME_ERROR(WSAGetLastError());
   1954      return PR_FAILURE;
   1955    }
   1956  }
   1957 }
   1958 
   1959 PRStatus _PR_MD_GETSOCKOPT(PRFileDesc* fd, PRInt32 level, PRInt32 optname,
   1960                           char* optval, PRInt32* optlen) {
   1961  PRInt32 rv;
   1962 
   1963  rv = getsockopt((SOCKET)fd->secret->md.osfd, level, optname, optval, optlen);
   1964  if (rv == 0) {
   1965    return PR_SUCCESS;
   1966  } else {
   1967    _PR_MD_MAP_GETSOCKOPT_ERROR(WSAGetLastError());
   1968    return PR_FAILURE;
   1969  }
   1970 }
   1971 
   1972 PRStatus _PR_MD_SETSOCKOPT(PRFileDesc* fd, PRInt32 level, PRInt32 optname,
   1973                           const char* optval, PRInt32 optlen) {
   1974  PRInt32 rv;
   1975 
   1976  rv = setsockopt((SOCKET)fd->secret->md.osfd, level, optname, optval, optlen);
   1977  if (rv == 0) {
   1978    return PR_SUCCESS;
   1979  } else {
   1980    _PR_MD_MAP_SETSOCKOPT_ERROR(WSAGetLastError());
   1981    return PR_FAILURE;
   1982  }
   1983 }
   1984 
   1985 /* --- FILE IO ----------------------------------------------------------- */
   1986 
   1987 PROsfd _PR_MD_OPEN(const char* name, PRIntn osflags, PRIntn mode) {
   1988  HANDLE file;
   1989  PRInt32 access = 0;
   1990  PRInt32 flags = 0;
   1991  PRInt32 flag6 = 0;
   1992 
   1993  if (osflags & PR_SYNC) {
   1994    flag6 = FILE_FLAG_WRITE_THROUGH;
   1995  }
   1996 
   1997  if (osflags & PR_RDONLY || osflags & PR_RDWR) {
   1998    access |= GENERIC_READ;
   1999  }
   2000  if (osflags & PR_WRONLY || osflags & PR_RDWR) {
   2001    access |= GENERIC_WRITE;
   2002  }
   2003 
   2004  if (osflags & PR_CREATE_FILE && osflags & PR_EXCL) {
   2005    flags = CREATE_NEW;
   2006  } else if (osflags & PR_CREATE_FILE) {
   2007    flags = (0 != (osflags & PR_TRUNCATE)) ? CREATE_ALWAYS : OPEN_ALWAYS;
   2008  } else if (osflags & PR_TRUNCATE) {
   2009    flags = TRUNCATE_EXISTING;
   2010  } else {
   2011    flags = OPEN_EXISTING;
   2012  }
   2013 
   2014  flag6 |= FILE_FLAG_OVERLAPPED;
   2015 
   2016  file = CreateFile(name, access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
   2017                    flags, flag6, NULL);
   2018  if (file == INVALID_HANDLE_VALUE) {
   2019    _PR_MD_MAP_OPEN_ERROR(GetLastError());
   2020    return -1;
   2021  }
   2022 
   2023  if (osflags & PR_APPEND) {
   2024    if (SetFilePointer(file, 0, 0, FILE_END) == 0xFFFFFFFF) {
   2025      _PR_MD_MAP_LSEEK_ERROR(GetLastError());
   2026      CloseHandle(file);
   2027      return -1;
   2028    }
   2029  }
   2030 
   2031  return (PROsfd)file;
   2032 }
   2033 
   2034 PROsfd _PR_MD_OPEN_FILE(const char* name, PRIntn osflags, PRIntn mode) {
   2035  HANDLE file;
   2036  PRInt32 access = 0;
   2037  PRInt32 flags = 0;
   2038  PRInt32 flag6 = 0;
   2039  SECURITY_ATTRIBUTES sa;
   2040  LPSECURITY_ATTRIBUTES lpSA = NULL;
   2041  PSECURITY_DESCRIPTOR pSD = NULL;
   2042  PACL pACL = NULL;
   2043 
   2044  if (osflags & PR_SYNC) {
   2045    flag6 = FILE_FLAG_WRITE_THROUGH;
   2046  }
   2047 
   2048  if (osflags & PR_RDONLY || osflags & PR_RDWR) {
   2049    access |= GENERIC_READ;
   2050  }
   2051  if (osflags & PR_WRONLY || osflags & PR_RDWR) {
   2052    access |= GENERIC_WRITE;
   2053  }
   2054 
   2055  if (osflags & PR_CREATE_FILE && osflags & PR_EXCL) {
   2056    flags = CREATE_NEW;
   2057  } else if (osflags & PR_CREATE_FILE) {
   2058    flags = (0 != (osflags & PR_TRUNCATE)) ? CREATE_ALWAYS : OPEN_ALWAYS;
   2059  } else if (osflags & PR_TRUNCATE) {
   2060    flags = TRUNCATE_EXISTING;
   2061  } else {
   2062    flags = OPEN_EXISTING;
   2063  }
   2064 
   2065  flag6 |= FILE_FLAG_OVERLAPPED;
   2066 
   2067  if (osflags & PR_CREATE_FILE) {
   2068    if (_PR_NT_MakeSecurityDescriptorACL(mode, fileAccessTable, &pSD, &pACL) ==
   2069        PR_SUCCESS) {
   2070      sa.nLength = sizeof(sa);
   2071      sa.lpSecurityDescriptor = pSD;
   2072      sa.bInheritHandle = FALSE;
   2073      lpSA = &sa;
   2074    }
   2075  }
   2076  file = CreateFile(name, access, FILE_SHARE_READ | FILE_SHARE_WRITE, lpSA,
   2077                    flags, flag6, NULL);
   2078  if (lpSA != NULL) {
   2079    _PR_NT_FreeSecurityDescriptorACL(pSD, pACL);
   2080  }
   2081  if (file == INVALID_HANDLE_VALUE) {
   2082    _PR_MD_MAP_OPEN_ERROR(GetLastError());
   2083    return -1;
   2084  }
   2085 
   2086  if (osflags & PR_APPEND) {
   2087    if (SetFilePointer(file, 0, 0, FILE_END) == 0xFFFFFFFF) {
   2088      _PR_MD_MAP_LSEEK_ERROR(GetLastError());
   2089      CloseHandle(file);
   2090      return -1;
   2091    }
   2092  }
   2093 
   2094  return (PROsfd)file;
   2095 }
   2096 
   2097 PRInt32 _PR_MD_READ(PRFileDesc* fd, void* buf, PRInt32 len) {
   2098  PROsfd f = fd->secret->md.osfd;
   2099  PRUint32 bytes;
   2100  int rv, err;
   2101  LONG hiOffset = 0;
   2102  LONG loOffset;
   2103  LARGE_INTEGER offset; /* use for a normalized add of len to offset */
   2104 
   2105  if (!fd->secret->md.sync_file_io) {
   2106    PRThread* me = _PR_MD_CURRENT_THREAD();
   2107 
   2108    if (me->io_suspended) {
   2109      PR_SetError(PR_INVALID_STATE_ERROR, 0);
   2110      return -1;
   2111    }
   2112 
   2113    memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   2114 
   2115    me->md.overlapped.overlapped.Offset = SetFilePointer(
   2116        (HANDLE)f, 0, &me->md.overlapped.overlapped.OffsetHigh, FILE_CURRENT);
   2117    PR_ASSERT((me->md.overlapped.overlapped.Offset != 0xffffffff) ||
   2118              (GetLastError() == NO_ERROR));
   2119 
   2120    if (fd->secret->inheritable == _PR_TRI_TRUE) {
   2121      rv = ReadFile((HANDLE)f, (LPVOID)buf, len, &bytes,
   2122                    &me->md.overlapped.overlapped);
   2123      if (rv != 0) {
   2124        loOffset = SetFilePointer((HANDLE)f, bytes, &hiOffset, FILE_CURRENT);
   2125        PR_ASSERT((loOffset != 0xffffffff) || (GetLastError() == NO_ERROR));
   2126        return bytes;
   2127      }
   2128      err = GetLastError();
   2129      if (err == ERROR_IO_PENDING) {
   2130        rv = GetOverlappedResult((HANDLE)f, &me->md.overlapped.overlapped,
   2131                                 &bytes, TRUE);
   2132        if (rv != 0) {
   2133          loOffset = SetFilePointer((HANDLE)f, bytes, &hiOffset, FILE_CURRENT);
   2134          PR_ASSERT((loOffset != 0xffffffff) || (GetLastError() == NO_ERROR));
   2135          return bytes;
   2136        }
   2137        err = GetLastError();
   2138      }
   2139      if (err == ERROR_HANDLE_EOF) {
   2140        return 0;
   2141      } else {
   2142        _PR_MD_MAP_READ_ERROR(err);
   2143        return -1;
   2144      }
   2145    } else {
   2146      if (!fd->secret->md.io_model_committed) {
   2147        rv = _md_Associate((HANDLE)f);
   2148        PR_ASSERT(rv != 0);
   2149        fd->secret->md.io_model_committed = PR_TRUE;
   2150      }
   2151 
   2152      if (_native_threads_only) {
   2153        me->md.overlapped.overlapped.hEvent = me->md.thr_event;
   2154      }
   2155 
   2156      _PR_THREAD_LOCK(me);
   2157      if (_PR_PENDING_INTERRUPT(me)) {
   2158        me->flags &= ~_PR_INTERRUPT;
   2159        PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   2160        _PR_THREAD_UNLOCK(me);
   2161        return -1;
   2162      }
   2163      me->io_pending = PR_TRUE;
   2164      me->state = _PR_IO_WAIT;
   2165      _PR_THREAD_UNLOCK(me);
   2166      me->io_fd = f;
   2167 
   2168      rv = ReadFile((HANDLE)f, (LPVOID)buf, len, &bytes,
   2169                    &me->md.overlapped.overlapped);
   2170      if ((rv == 0) && ((err = GetLastError()) != ERROR_IO_PENDING)) {
   2171        _PR_THREAD_LOCK(me);
   2172        me->io_pending = PR_FALSE;
   2173        me->state = _PR_RUNNING;
   2174        if (_PR_PENDING_INTERRUPT(me)) {
   2175          me->flags &= ~_PR_INTERRUPT;
   2176          PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   2177          _PR_THREAD_UNLOCK(me);
   2178          return -1;
   2179        }
   2180        _PR_THREAD_UNLOCK(me);
   2181 
   2182        if (err == ERROR_HANDLE_EOF) {
   2183          return 0;
   2184        }
   2185        _PR_MD_MAP_READ_ERROR(err);
   2186        return -1;
   2187      }
   2188 
   2189      if (_native_threads_only && rv) {
   2190        _native_thread_io_nowait(me, rv, bytes);
   2191      } else if (_NT_IO_WAIT(me, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
   2192        PR_ASSERT(0);
   2193        return -1;
   2194      }
   2195 
   2196      PR_ASSERT(me->io_pending == PR_FALSE || me->io_suspended == PR_TRUE);
   2197 
   2198      if (me->io_suspended) {
   2199        if (_PR_PENDING_INTERRUPT(me)) {
   2200          me->flags &= ~_PR_INTERRUPT;
   2201          PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   2202        } else {
   2203          PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   2204        }
   2205        return -1;
   2206      }
   2207 
   2208      if (me->md.blocked_io_status == 0) {
   2209        if (me->md.blocked_io_error == ERROR_HANDLE_EOF) {
   2210          return 0;
   2211        }
   2212        _PR_MD_MAP_READ_ERROR(me->md.blocked_io_error);
   2213        return -1;
   2214      }
   2215 
   2216      /* Apply the workaround from bug 70765 (see _PR_MD_WRITE)
   2217       * to the reading code, too. */
   2218 
   2219      offset.LowPart = me->md.overlapped.overlapped.Offset;
   2220      offset.HighPart = me->md.overlapped.overlapped.OffsetHigh;
   2221      offset.QuadPart += me->md.blocked_io_bytes;
   2222 
   2223      SetFilePointer((HANDLE)f, offset.LowPart, &offset.HighPart, FILE_BEGIN);
   2224 
   2225      PR_ASSERT(me->io_pending == PR_FALSE);
   2226 
   2227      return me->md.blocked_io_bytes;
   2228    }
   2229  } else {
   2230    rv = ReadFile((HANDLE)f, (LPVOID)buf, len, &bytes, NULL);
   2231    if (rv == 0) {
   2232      err = GetLastError();
   2233      /* ERROR_HANDLE_EOF can only be returned by async io */
   2234      PR_ASSERT(err != ERROR_HANDLE_EOF);
   2235      if (err == ERROR_BROKEN_PIPE) {
   2236        /* The write end of the pipe has been closed. */
   2237        return 0;
   2238      }
   2239      _PR_MD_MAP_READ_ERROR(err);
   2240      return -1;
   2241    }
   2242    return bytes;
   2243  }
   2244 }
   2245 
   2246 PRInt32 _PR_MD_WRITE(PRFileDesc* fd, const void* buf, PRInt32 len) {
   2247  PROsfd f = fd->secret->md.osfd;
   2248  PRInt32 bytes;
   2249  int rv, err;
   2250  LONG hiOffset = 0;
   2251  LONG loOffset;
   2252  LARGE_INTEGER offset; /* use for the calculation of the new offset */
   2253 
   2254  if (!fd->secret->md.sync_file_io) {
   2255    PRThread* me = _PR_MD_CURRENT_THREAD();
   2256 
   2257    if (me->io_suspended) {
   2258      PR_SetError(PR_INVALID_STATE_ERROR, 0);
   2259      return -1;
   2260    }
   2261 
   2262    memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   2263 
   2264    me->md.overlapped.overlapped.Offset = SetFilePointer(
   2265        (HANDLE)f, 0, &me->md.overlapped.overlapped.OffsetHigh, FILE_CURRENT);
   2266    PR_ASSERT((me->md.overlapped.overlapped.Offset != 0xffffffff) ||
   2267              (GetLastError() == NO_ERROR));
   2268 
   2269    if (fd->secret->inheritable == _PR_TRI_TRUE) {
   2270      rv = WriteFile((HANDLE)f, (LPVOID)buf, len, &bytes,
   2271                     &me->md.overlapped.overlapped);
   2272      if (rv != 0) {
   2273        loOffset = SetFilePointer((HANDLE)f, bytes, &hiOffset, FILE_CURRENT);
   2274        PR_ASSERT((loOffset != 0xffffffff) || (GetLastError() == NO_ERROR));
   2275        return bytes;
   2276      }
   2277      err = GetLastError();
   2278      if (err == ERROR_IO_PENDING) {
   2279        rv = GetOverlappedResult((HANDLE)f, &me->md.overlapped.overlapped,
   2280                                 &bytes, TRUE);
   2281        if (rv != 0) {
   2282          loOffset = SetFilePointer((HANDLE)f, bytes, &hiOffset, FILE_CURRENT);
   2283          PR_ASSERT((loOffset != 0xffffffff) || (GetLastError() == NO_ERROR));
   2284          return bytes;
   2285        }
   2286        err = GetLastError();
   2287      }
   2288      _PR_MD_MAP_READ_ERROR(err);
   2289      return -1;
   2290    } else {
   2291      if (!fd->secret->md.io_model_committed) {
   2292        rv = _md_Associate((HANDLE)f);
   2293        PR_ASSERT(rv != 0);
   2294        fd->secret->md.io_model_committed = PR_TRUE;
   2295      }
   2296      if (_native_threads_only) {
   2297        me->md.overlapped.overlapped.hEvent = me->md.thr_event;
   2298      }
   2299 
   2300      _PR_THREAD_LOCK(me);
   2301      if (_PR_PENDING_INTERRUPT(me)) {
   2302        me->flags &= ~_PR_INTERRUPT;
   2303        PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   2304        _PR_THREAD_UNLOCK(me);
   2305        return -1;
   2306      }
   2307      me->io_pending = PR_TRUE;
   2308      me->state = _PR_IO_WAIT;
   2309      _PR_THREAD_UNLOCK(me);
   2310      me->io_fd = f;
   2311 
   2312      rv = WriteFile((HANDLE)f, buf, len, &bytes,
   2313                     &(me->md.overlapped.overlapped));
   2314      if ((rv == 0) && ((err = GetLastError()) != ERROR_IO_PENDING)) {
   2315        _PR_THREAD_LOCK(me);
   2316        me->io_pending = PR_FALSE;
   2317        me->state = _PR_RUNNING;
   2318        if (_PR_PENDING_INTERRUPT(me)) {
   2319          me->flags &= ~_PR_INTERRUPT;
   2320          PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   2321          _PR_THREAD_UNLOCK(me);
   2322          return -1;
   2323        }
   2324        _PR_THREAD_UNLOCK(me);
   2325 
   2326        _PR_MD_MAP_WRITE_ERROR(err);
   2327        return -1;
   2328      }
   2329 
   2330      if (_native_threads_only && rv) {
   2331        _native_thread_io_nowait(me, rv, bytes);
   2332      } else if (_NT_IO_WAIT(me, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
   2333        PR_ASSERT(0);
   2334        return -1;
   2335      }
   2336 
   2337      PR_ASSERT(me->io_pending == PR_FALSE || me->io_suspended == PR_TRUE);
   2338 
   2339      if (me->io_suspended) {
   2340        if (_PR_PENDING_INTERRUPT(me)) {
   2341          me->flags &= ~_PR_INTERRUPT;
   2342          PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   2343        } else {
   2344          PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   2345        }
   2346        return -1;
   2347      }
   2348 
   2349      if (me->md.blocked_io_status == 0) {
   2350        _PR_MD_MAP_WRITE_ERROR(me->md.blocked_io_error);
   2351        return -1;
   2352      }
   2353 
   2354      /*
   2355       * Moving the file pointer by a relative offset (FILE_CURRENT)
   2356       * does not work with a file on a network drive exported by a
   2357       * Win2K system.  We still don't know why.  A workaround is to
   2358       * move the file pointer by an absolute offset (FILE_BEGIN).
   2359       * (Bugzilla bug 70765)
   2360       */
   2361      offset.LowPart = me->md.overlapped.overlapped.Offset;
   2362      offset.HighPart = me->md.overlapped.overlapped.OffsetHigh;
   2363      offset.QuadPart += me->md.blocked_io_bytes;
   2364 
   2365      SetFilePointer((HANDLE)f, offset.LowPart, &offset.HighPart, FILE_BEGIN);
   2366 
   2367      PR_ASSERT(me->io_pending == PR_FALSE);
   2368 
   2369      return me->md.blocked_io_bytes;
   2370    }
   2371  } else {
   2372    rv = WriteFile((HANDLE)f, buf, len, &bytes, NULL);
   2373    if (rv == 0) {
   2374      _PR_MD_MAP_WRITE_ERROR(GetLastError());
   2375      return -1;
   2376    }
   2377    return bytes;
   2378  }
   2379 }
   2380 
   2381 PRInt32 _PR_MD_SOCKETAVAILABLE(PRFileDesc* fd) {
   2382  PRInt32 result;
   2383 
   2384  if (ioctlsocket(fd->secret->md.osfd, FIONREAD, &result) < 0) {
   2385    PR_SetError(PR_BAD_DESCRIPTOR_ERROR, WSAGetLastError());
   2386    return -1;
   2387  }
   2388  return result;
   2389 }
   2390 
   2391 PRInt32 _PR_MD_PIPEAVAILABLE(PRFileDesc* fd) {
   2392  if (NULL == fd) {
   2393    PR_SetError(PR_BAD_DESCRIPTOR_ERROR, 0);
   2394  } else {
   2395    PR_SetError(PR_NOT_IMPLEMENTED_ERROR, 0);
   2396  }
   2397  return -1;
   2398 }
   2399 
   2400 PROffset32 _PR_MD_LSEEK(PRFileDesc* fd, PROffset32 offset,
   2401                        PRSeekWhence whence) {
   2402  DWORD moveMethod;
   2403  PROffset32 rv;
   2404 
   2405  switch (whence) {
   2406    case PR_SEEK_SET:
   2407      moveMethod = FILE_BEGIN;
   2408      break;
   2409    case PR_SEEK_CUR:
   2410      moveMethod = FILE_CURRENT;
   2411      break;
   2412    case PR_SEEK_END:
   2413      moveMethod = FILE_END;
   2414      break;
   2415    default:
   2416      PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
   2417      return -1;
   2418  }
   2419 
   2420  rv = SetFilePointer((HANDLE)fd->secret->md.osfd, offset, NULL, moveMethod);
   2421 
   2422  /*
   2423   * If the lpDistanceToMoveHigh argument (third argument) is
   2424   * NULL, SetFilePointer returns 0xffffffff on failure.
   2425   */
   2426  if (-1 == rv) {
   2427    _PR_MD_MAP_LSEEK_ERROR(GetLastError());
   2428  }
   2429  return rv;
   2430 }
   2431 
   2432 PROffset64 _PR_MD_LSEEK64(PRFileDesc* fd, PROffset64 offset,
   2433                          PRSeekWhence whence) {
   2434  DWORD moveMethod;
   2435  LARGE_INTEGER li;
   2436  DWORD err;
   2437 
   2438  switch (whence) {
   2439    case PR_SEEK_SET:
   2440      moveMethod = FILE_BEGIN;
   2441      break;
   2442    case PR_SEEK_CUR:
   2443      moveMethod = FILE_CURRENT;
   2444      break;
   2445    case PR_SEEK_END:
   2446      moveMethod = FILE_END;
   2447      break;
   2448    default:
   2449      PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
   2450      return -1;
   2451  }
   2452 
   2453  li.QuadPart = offset;
   2454  li.LowPart = SetFilePointer((HANDLE)fd->secret->md.osfd, li.LowPart,
   2455                              &li.HighPart, moveMethod);
   2456 
   2457  if (0xffffffff == li.LowPart && (err = GetLastError()) != NO_ERROR) {
   2458    _PR_MD_MAP_LSEEK_ERROR(err);
   2459    li.QuadPart = -1;
   2460  }
   2461  return li.QuadPart;
   2462 }
   2463 
   2464 /*
   2465 * This is documented to succeed on read-only files, but Win32's
   2466 * FlushFileBuffers functions fails with "access denied" in such a
   2467 * case.  So we only signal an error if the error is *not* "access
   2468 * denied".
   2469 */
   2470 PRInt32 _PR_MD_FSYNC(PRFileDesc* fd) {
   2471  /*
   2472   * From the documentation:
   2473   *
   2474   *     On Windows NT, the function FlushFileBuffers fails if hFile
   2475   *     is a handle to console output. That is because console
   2476   *     output is not buffered. The function returns FALSE, and
   2477   *     GetLastError returns ERROR_INVALID_HANDLE.
   2478   *
   2479   * On the other hand, on Win95, it returns without error.  I cannot
   2480   * assume that 0, 1, and 2 are console, because if someone closes
   2481   * System.out and then opens a file, they might get file descriptor
   2482   * 1.  An error on *that* version of 1 should be reported, whereas
   2483   * an error on System.out (which was the original 1) should be
   2484   * ignored.  So I use isatty() to ensure that such an error was
   2485   * because of this, and if it was, I ignore the error.
   2486   */
   2487 
   2488  BOOL ok = FlushFileBuffers((HANDLE)fd->secret->md.osfd);
   2489 
   2490  if (!ok) {
   2491    DWORD err = GetLastError();
   2492 
   2493    if (err != ERROR_ACCESS_DENIED) { /* from winerror.h */
   2494      _PR_MD_MAP_FSYNC_ERROR(err);
   2495      return -1;
   2496    }
   2497  }
   2498  return 0;
   2499 }
   2500 
   2501 PRInt32 _PR_MD_CLOSE(PROsfd osfd, PRBool socket) {
   2502  PRInt32 rv;
   2503  PRThread* me = _PR_MD_CURRENT_THREAD();
   2504 
   2505  if (socket) {
   2506    rv = closesocket((SOCKET)osfd);
   2507    if (rv < 0) {
   2508      _PR_MD_MAP_CLOSE_ERROR(WSAGetLastError());
   2509    }
   2510  } else {
   2511    rv = CloseHandle((HANDLE)osfd) ? 0 : -1;
   2512    if (rv < 0) {
   2513      _PR_MD_MAP_CLOSE_ERROR(GetLastError());
   2514    }
   2515  }
   2516 
   2517  if (rv == 0 && me->io_suspended) {
   2518    if (me->io_fd == osfd) {
   2519      PRBool fWait;
   2520 
   2521      _PR_THREAD_LOCK(me);
   2522      me->state = _PR_IO_WAIT;
   2523      /* The IO could have completed on another thread just after
   2524       * calling closesocket while the io_suspended flag was true.
   2525       * So we now grab the lock to do a safe check on io_pending to
   2526       * see if we need to wait or not.
   2527       */
   2528      fWait = me->io_pending;
   2529      me->io_suspended = PR_FALSE;
   2530      me->md.interrupt_disabled = PR_TRUE;
   2531      _PR_THREAD_UNLOCK(me);
   2532 
   2533      if (fWait) {
   2534        _NT_IO_WAIT(me, PR_INTERVAL_NO_TIMEOUT);
   2535      }
   2536      PR_ASSERT(me->io_suspended == PR_FALSE);
   2537      PR_ASSERT(me->io_pending == PR_FALSE);
   2538      /*
   2539       * I/O operation is no longer pending; the thread can now
   2540       * run on any cpu
   2541       */
   2542      _PR_THREAD_LOCK(me);
   2543      me->md.interrupt_disabled = PR_FALSE;
   2544      me->md.thr_bound_cpu = NULL;
   2545      me->io_suspended = PR_FALSE;
   2546      me->io_pending = PR_FALSE;
   2547      me->state = _PR_RUNNING;
   2548      _PR_THREAD_UNLOCK(me);
   2549    }
   2550  }
   2551  return rv;
   2552 }
   2553 
   2554 PRStatus _PR_MD_SET_FD_INHERITABLE(PRFileDesc* fd, PRBool inheritable) {
   2555  BOOL rv;
   2556 
   2557  if (fd->secret->md.io_model_committed) {
   2558    PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
   2559    return PR_FAILURE;
   2560  }
   2561  rv = SetHandleInformation((HANDLE)fd->secret->md.osfd, HANDLE_FLAG_INHERIT,
   2562                            inheritable ? HANDLE_FLAG_INHERIT : 0);
   2563  if (0 == rv) {
   2564    _PR_MD_MAP_DEFAULT_ERROR(GetLastError());
   2565    return PR_FAILURE;
   2566  }
   2567  return PR_SUCCESS;
   2568 }
   2569 
   2570 void _PR_MD_INIT_FD_INHERITABLE(PRFileDesc* fd, PRBool imported) {
   2571  if (imported) {
   2572    fd->secret->inheritable = _PR_TRI_UNKNOWN;
   2573  } else {
   2574    fd->secret->inheritable = _PR_TRI_FALSE;
   2575  }
   2576 }
   2577 
   2578 void _PR_MD_QUERY_FD_INHERITABLE(PRFileDesc* fd) {
   2579  DWORD flags;
   2580 
   2581  PR_ASSERT(_PR_TRI_UNKNOWN == fd->secret->inheritable);
   2582  if (fd->secret->md.io_model_committed) {
   2583    return;
   2584  }
   2585  if (GetHandleInformation((HANDLE)fd->secret->md.osfd, &flags)) {
   2586    if (flags & HANDLE_FLAG_INHERIT) {
   2587      fd->secret->inheritable = _PR_TRI_TRUE;
   2588    } else {
   2589      fd->secret->inheritable = _PR_TRI_FALSE;
   2590    }
   2591  }
   2592 }
   2593 
   2594 /* --- DIR IO ------------------------------------------------------------ */
   2595 #define GetFileFromDIR(d) (d)->d_entry.cFileName
   2596 #define FileIsHidden(d) ((d)->d_entry.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN)
   2597 
   2598 void FlipSlashes(char* cp, int len) {
   2599  while (--len >= 0) {
   2600    if (cp[0] == '/') {
   2601      cp[0] = PR_DIRECTORY_SEPARATOR;
   2602    }
   2603    cp = _mbsinc(cp);
   2604  }
   2605 } /* end FlipSlashes() */
   2606 
   2607 /*
   2608 **
   2609 ** Local implementations of standard Unix RTL functions which are not provided
   2610 ** by the VC RTL.
   2611 **
   2612 */
   2613 
   2614 PRInt32 _PR_MD_CLOSE_DIR(_MDDir* d) {
   2615  if (d) {
   2616    if (FindClose(d->d_hdl)) {
   2617      d->magic = (PRUint32)-1;
   2618      return 0;
   2619    } else {
   2620      _PR_MD_MAP_CLOSEDIR_ERROR(GetLastError());
   2621      return -1;
   2622    }
   2623  }
   2624  PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
   2625  return -1;
   2626 }
   2627 
   2628 PRStatus _PR_MD_OPEN_DIR(_MDDir* d, const char* name) {
   2629  char filename[MAX_PATH];
   2630  int len;
   2631 
   2632  len = strlen(name);
   2633  /* Need 5 bytes for \*.* and the trailing null byte. */
   2634  if (len + 5 > MAX_PATH) {
   2635    PR_SetError(PR_NAME_TOO_LONG_ERROR, 0);
   2636    return PR_FAILURE;
   2637  }
   2638  strcpy(filename, name);
   2639 
   2640  /*
   2641   * If 'name' ends in a slash or backslash, do not append
   2642   * another backslash.
   2643   */
   2644  if (IsPrevCharSlash(filename, filename + len)) {
   2645    len--;
   2646  }
   2647  strcpy(&filename[len], "\\*.*");
   2648  FlipSlashes(filename, strlen(filename));
   2649 
   2650  d->d_hdl = FindFirstFile(filename, &(d->d_entry));
   2651  if (d->d_hdl == INVALID_HANDLE_VALUE) {
   2652    _PR_MD_MAP_OPENDIR_ERROR(GetLastError());
   2653    return PR_FAILURE;
   2654  }
   2655  d->firstEntry = PR_TRUE;
   2656  d->magic = _MD_MAGIC_DIR;
   2657  return PR_SUCCESS;
   2658 }
   2659 
   2660 char* _PR_MD_READ_DIR(_MDDir* d, PRIntn flags) {
   2661  PRInt32 err;
   2662  BOOL rv;
   2663  char* fileName;
   2664 
   2665  if (d) {
   2666    while (1) {
   2667      if (d->firstEntry) {
   2668        d->firstEntry = PR_FALSE;
   2669        rv = 1;
   2670      } else {
   2671        rv = FindNextFile(d->d_hdl, &(d->d_entry));
   2672      }
   2673      if (rv == 0) {
   2674        break;
   2675      }
   2676      fileName = GetFileFromDIR(d);
   2677      if ((flags & PR_SKIP_DOT) && (fileName[0] == '.') &&
   2678          (fileName[1] == '\0')) {
   2679        continue;
   2680      }
   2681      if ((flags & PR_SKIP_DOT_DOT) && (fileName[0] == '.') &&
   2682          (fileName[1] == '.') && (fileName[2] == '\0')) {
   2683        continue;
   2684      }
   2685      if ((flags & PR_SKIP_HIDDEN) && FileIsHidden(d)) {
   2686        continue;
   2687      }
   2688      return fileName;
   2689    }
   2690    err = GetLastError();
   2691    PR_ASSERT(NO_ERROR != err);
   2692    _PR_MD_MAP_READDIR_ERROR(err);
   2693    return NULL;
   2694  }
   2695  PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
   2696  return NULL;
   2697 }
   2698 
   2699 PRInt32 _PR_MD_DELETE(const char* name) {
   2700  if (DeleteFile(name)) {
   2701    return 0;
   2702  } else {
   2703    _PR_MD_MAP_DELETE_ERROR(GetLastError());
   2704    return -1;
   2705  }
   2706 }
   2707 
   2708 void _PR_FileTimeToPRTime(const FILETIME* filetime, PRTime* prtm) {
   2709  PR_ASSERT(sizeof(FILETIME) == sizeof(PRTime));
   2710  CopyMemory(prtm, filetime, sizeof(PRTime));
   2711 #ifdef __GNUC__
   2712  *prtm = (*prtm - _pr_filetime_offset) / 10LL;
   2713 #else
   2714  *prtm = (*prtm - _pr_filetime_offset) / 10i64;
   2715 #endif
   2716 
   2717 #ifdef DEBUG
   2718  /* Doublecheck our calculation. */
   2719  {
   2720    SYSTEMTIME systime;
   2721    PRExplodedTime etm;
   2722    PRTime cmp; /* for comparison */
   2723    BOOL rv;
   2724 
   2725    rv = FileTimeToSystemTime(filetime, &systime);
   2726    PR_ASSERT(0 != rv);
   2727 
   2728    /*
   2729     * PR_ImplodeTime ignores wday and yday.
   2730     */
   2731    etm.tm_usec = systime.wMilliseconds * PR_USEC_PER_MSEC;
   2732    etm.tm_sec = systime.wSecond;
   2733    etm.tm_min = systime.wMinute;
   2734    etm.tm_hour = systime.wHour;
   2735    etm.tm_mday = systime.wDay;
   2736    etm.tm_month = systime.wMonth - 1;
   2737    etm.tm_year = systime.wYear;
   2738    /*
   2739     * It is not well-documented what time zone the FILETIME's
   2740     * are in.  WIN32_FIND_DATA is documented to be in UTC (GMT).
   2741     * But BY_HANDLE_FILE_INFORMATION is unclear about this.
   2742     * By our best judgement, we assume that FILETIME is in UTC.
   2743     */
   2744    etm.tm_params.tp_gmt_offset = 0;
   2745    etm.tm_params.tp_dst_offset = 0;
   2746    cmp = PR_ImplodeTime(&etm);
   2747 
   2748    /*
   2749     * SYSTEMTIME is in milliseconds precision, so we convert PRTime's
   2750     * microseconds to milliseconds before doing the comparison.
   2751     */
   2752    PR_ASSERT((cmp / PR_USEC_PER_MSEC) == (*prtm / PR_USEC_PER_MSEC));
   2753  }
   2754 #endif /* DEBUG */
   2755 }
   2756 
   2757 PRInt32 _PR_MD_STAT(const char* fn, struct stat* info) {
   2758  PRInt32 rv;
   2759 
   2760  rv = _stat(fn, (struct _stat*)info);
   2761  if (-1 == rv) {
   2762    /*
   2763     * Check for MSVC runtime library _stat() bug.
   2764     * (It's really a bug in FindFirstFile().)
   2765     * If a pathname ends in a backslash or slash,
   2766     * e.g., c:\temp\ or c:/temp/, _stat() will fail.
   2767     * Note: a pathname ending in a slash (e.g., c:/temp/)
   2768     * can be handled by _stat() on NT but not on Win95.
   2769     *
   2770     * We remove the backslash or slash at the end and
   2771     * try again.
   2772     */
   2773 
   2774    int len = strlen(fn);
   2775    if (len > 0 && len <= _MAX_PATH && IsPrevCharSlash(fn, fn + len)) {
   2776      char newfn[_MAX_PATH + 1];
   2777 
   2778      strcpy(newfn, fn);
   2779      newfn[len - 1] = '\0';
   2780      rv = _stat(newfn, (struct _stat*)info);
   2781    }
   2782  }
   2783 
   2784  if (-1 == rv) {
   2785    _PR_MD_MAP_STAT_ERROR(errno);
   2786  }
   2787  return rv;
   2788 }
   2789 
   2790 #define _PR_IS_SLASH(ch) ((ch) == '/' || (ch) == '\\')
   2791 
   2792 static PRBool IsPrevCharSlash(const char* str, const char* current) {
   2793  const char* prev;
   2794 
   2795  if (str >= current) {
   2796    return PR_FALSE;
   2797  }
   2798  prev = _mbsdec(str, current);
   2799  return (prev == current - 1) && _PR_IS_SLASH(*prev);
   2800 }
   2801 
   2802 /*
   2803 * IsRootDirectory --
   2804 *
   2805 * Return PR_TRUE if the pathname 'fn' is a valid root directory,
   2806 * else return PR_FALSE.  The char buffer pointed to by 'fn' must
   2807 * be writable.  During the execution of this function, the contents
   2808 * of the buffer pointed to by 'fn' may be modified, but on return
   2809 * the original contents will be restored.  'buflen' is the size of
   2810 * the buffer pointed to by 'fn'.
   2811 *
   2812 * Root directories come in three formats:
   2813 * 1. / or \, meaning the root directory of the current drive.
   2814 * 2. C:/ or C:\, where C is a drive letter.
   2815 * 3. \\<server name>\<share point name>\ or
   2816 *    \\<server name>\<share point name>, meaning the root directory
   2817 *    of a UNC (Universal Naming Convention) name.
   2818 */
   2819 
   2820 static PRBool IsRootDirectory(char* fn, size_t buflen) {
   2821  char* p;
   2822  PRBool slashAdded = PR_FALSE;
   2823  PRBool rv = PR_FALSE;
   2824 
   2825  if (_PR_IS_SLASH(fn[0]) && fn[1] == '\0') {
   2826    return PR_TRUE;
   2827  }
   2828 
   2829  if (isalpha((unsigned char)fn[0]) && fn[1] == ':' && _PR_IS_SLASH(fn[2]) && fn[3] == '\0') {
   2830    rv = GetDriveType(fn) > 1 ? PR_TRUE : PR_FALSE;
   2831    return rv;
   2832  }
   2833 
   2834  /* The UNC root directory */
   2835 
   2836  if (_PR_IS_SLASH(fn[0]) && _PR_IS_SLASH(fn[1])) {
   2837    /* The 'server' part should have at least one character. */
   2838    p = &fn[2];
   2839    if (*p == '\0' || _PR_IS_SLASH(*p)) {
   2840      return PR_FALSE;
   2841    }
   2842 
   2843    /* look for the next slash */
   2844    do {
   2845      p = _mbsinc(p);
   2846    } while (*p != '\0' && !_PR_IS_SLASH(*p));
   2847    if (*p == '\0') {
   2848      return PR_FALSE;
   2849    }
   2850 
   2851    /* The 'share' part should have at least one character. */
   2852    p++;
   2853    if (*p == '\0' || _PR_IS_SLASH(*p)) {
   2854      return PR_FALSE;
   2855    }
   2856 
   2857    /* look for the final slash */
   2858    do {
   2859      p = _mbsinc(p);
   2860    } while (*p != '\0' && !_PR_IS_SLASH(*p));
   2861    if (_PR_IS_SLASH(*p) && p[1] != '\0') {
   2862      return PR_FALSE;
   2863    }
   2864    if (*p == '\0') {
   2865      /*
   2866       * GetDriveType() doesn't work correctly if the
   2867       * path is of the form \\server\share, so we add
   2868       * a final slash temporarily.
   2869       */
   2870      if ((p + 1) < (fn + buflen)) {
   2871        *p++ = '\\';
   2872        *p = '\0';
   2873        slashAdded = PR_TRUE;
   2874      } else {
   2875        return PR_FALSE; /* name too long */
   2876      }
   2877    }
   2878    rv = GetDriveType(fn) > 1 ? PR_TRUE : PR_FALSE;
   2879    /* restore the 'fn' buffer */
   2880    if (slashAdded) {
   2881      *--p = '\0';
   2882    }
   2883  }
   2884  return rv;
   2885 }
   2886 
   2887 PRInt32 _PR_MD_GETFILEINFO64(const char* fn, PRFileInfo64* info) {
   2888  WIN32_FILE_ATTRIBUTE_DATA findFileData;
   2889 
   2890  if (NULL == fn || '\0' == *fn) {
   2891    PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
   2892    return -1;
   2893  }
   2894 
   2895  if (!GetFileAttributesEx(fn, GetFileExInfoStandard, &findFileData)) {
   2896    _PR_MD_MAP_OPENDIR_ERROR(GetLastError());
   2897    return -1;
   2898  }
   2899 
   2900  if (findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
   2901    info->type = PR_FILE_DIRECTORY;
   2902  } else {
   2903    info->type = PR_FILE_FILE;
   2904  }
   2905 
   2906  info->size = findFileData.nFileSizeHigh;
   2907  info->size = (info->size << 32) + findFileData.nFileSizeLow;
   2908 
   2909  _PR_FileTimeToPRTime(&findFileData.ftLastWriteTime, &info->modifyTime);
   2910 
   2911  if (0 == findFileData.ftCreationTime.dwLowDateTime &&
   2912      0 == findFileData.ftCreationTime.dwHighDateTime) {
   2913    info->creationTime = info->modifyTime;
   2914  } else {
   2915    _PR_FileTimeToPRTime(&findFileData.ftCreationTime, &info->creationTime);
   2916  }
   2917 
   2918  return 0;
   2919 }
   2920 
   2921 PRInt32 _PR_MD_GETFILEINFO(const char* fn, PRFileInfo* info) {
   2922  PRFileInfo64 info64;
   2923  PRInt32 rv = _PR_MD_GETFILEINFO64(fn, &info64);
   2924  if (0 == rv) {
   2925    info->type = info64.type;
   2926    info->size = (PRUint32)info64.size;
   2927    info->modifyTime = info64.modifyTime;
   2928    info->creationTime = info64.creationTime;
   2929  }
   2930  return rv;
   2931 }
   2932 
   2933 PRInt32 _PR_MD_GETOPENFILEINFO64(const PRFileDesc* fd, PRFileInfo64* info) {
   2934  int rv;
   2935 
   2936  BY_HANDLE_FILE_INFORMATION hinfo;
   2937 
   2938  rv = GetFileInformationByHandle((HANDLE)fd->secret->md.osfd, &hinfo);
   2939  if (rv == FALSE) {
   2940    _PR_MD_MAP_FSTAT_ERROR(GetLastError());
   2941    return -1;
   2942  }
   2943 
   2944  if (hinfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
   2945    info->type = PR_FILE_DIRECTORY;
   2946  } else {
   2947    info->type = PR_FILE_FILE;
   2948  }
   2949 
   2950  info->size = hinfo.nFileSizeHigh;
   2951  info->size = (info->size << 32) + hinfo.nFileSizeLow;
   2952 
   2953  _PR_FileTimeToPRTime(&hinfo.ftLastWriteTime, &(info->modifyTime));
   2954  _PR_FileTimeToPRTime(&hinfo.ftCreationTime, &(info->creationTime));
   2955 
   2956  return 0;
   2957 }
   2958 
   2959 PRInt32 _PR_MD_GETOPENFILEINFO(const PRFileDesc* fd, PRFileInfo* info) {
   2960  int rv;
   2961 
   2962  BY_HANDLE_FILE_INFORMATION hinfo;
   2963 
   2964  rv = GetFileInformationByHandle((HANDLE)fd->secret->md.osfd, &hinfo);
   2965  if (rv == FALSE) {
   2966    _PR_MD_MAP_FSTAT_ERROR(GetLastError());
   2967    return -1;
   2968  }
   2969 
   2970  if (hinfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
   2971    info->type = PR_FILE_DIRECTORY;
   2972  } else {
   2973    info->type = PR_FILE_FILE;
   2974  }
   2975 
   2976  info->size = hinfo.nFileSizeLow;
   2977 
   2978  _PR_FileTimeToPRTime(&hinfo.ftLastWriteTime, &(info->modifyTime));
   2979  _PR_FileTimeToPRTime(&hinfo.ftCreationTime, &(info->creationTime));
   2980 
   2981  return 0;
   2982 }
   2983 
   2984 PRInt32 _PR_MD_RENAME(const char* from, const char* to) {
   2985  /* Does this work with dot-relative pathnames? */
   2986  if (MoveFile(from, to)) {
   2987    return 0;
   2988  } else {
   2989    _PR_MD_MAP_RENAME_ERROR(GetLastError());
   2990    return -1;
   2991  }
   2992 }
   2993 
   2994 PRInt32 _PR_MD_ACCESS(const char* name, PRAccessHow how) {
   2995  PRInt32 rv;
   2996 
   2997  switch (how) {
   2998    case PR_ACCESS_WRITE_OK:
   2999      rv = _access(name, 02);
   3000      break;
   3001    case PR_ACCESS_READ_OK:
   3002      rv = _access(name, 04);
   3003      break;
   3004    case PR_ACCESS_EXISTS:
   3005      rv = _access(name, 00);
   3006      break;
   3007    default:
   3008      PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
   3009      return -1;
   3010  }
   3011  if (rv < 0) {
   3012    _PR_MD_MAP_ACCESS_ERROR(errno);
   3013  }
   3014  return rv;
   3015 }
   3016 
   3017 PRInt32 _PR_MD_MKDIR(const char* name, PRIntn mode) {
   3018  /* XXXMB - how to translate the "mode"??? */
   3019  if (CreateDirectory(name, NULL)) {
   3020    return 0;
   3021  } else {
   3022    _PR_MD_MAP_MKDIR_ERROR(GetLastError());
   3023    return -1;
   3024  }
   3025 }
   3026 
   3027 PRInt32 _PR_MD_MAKE_DIR(const char* name, PRIntn mode) {
   3028  BOOL rv;
   3029  SECURITY_ATTRIBUTES sa;
   3030  LPSECURITY_ATTRIBUTES lpSA = NULL;
   3031  PSECURITY_DESCRIPTOR pSD = NULL;
   3032  PACL pACL = NULL;
   3033 
   3034  if (_PR_NT_MakeSecurityDescriptorACL(mode, dirAccessTable, &pSD, &pACL) ==
   3035      PR_SUCCESS) {
   3036    sa.nLength = sizeof(sa);
   3037    sa.lpSecurityDescriptor = pSD;
   3038    sa.bInheritHandle = FALSE;
   3039    lpSA = &sa;
   3040  }
   3041  rv = CreateDirectory(name, lpSA);
   3042  if (lpSA != NULL) {
   3043    _PR_NT_FreeSecurityDescriptorACL(pSD, pACL);
   3044  }
   3045  if (rv) {
   3046    return 0;
   3047  } else {
   3048    _PR_MD_MAP_MKDIR_ERROR(GetLastError());
   3049    return -1;
   3050  }
   3051 }
   3052 
   3053 PRInt32 _PR_MD_RMDIR(const char* name) {
   3054  if (RemoveDirectory(name)) {
   3055    return 0;
   3056  } else {
   3057    _PR_MD_MAP_RMDIR_ERROR(GetLastError());
   3058    return -1;
   3059  }
   3060 }
   3061 
   3062 PRStatus _PR_MD_LOCKFILE(PROsfd f) {
   3063  PRInt32 rv, err;
   3064  PRThread* me = _PR_MD_CURRENT_THREAD();
   3065 
   3066  if (me->io_suspended) {
   3067    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   3068    return PR_FAILURE;
   3069  }
   3070 
   3071  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   3072 
   3073  _PR_THREAD_LOCK(me);
   3074  if (_PR_PENDING_INTERRUPT(me)) {
   3075    me->flags &= ~_PR_INTERRUPT;
   3076    PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3077    _PR_THREAD_UNLOCK(me);
   3078    return -1;
   3079  }
   3080  me->io_pending = PR_TRUE;
   3081  me->state = _PR_IO_WAIT;
   3082  _PR_THREAD_UNLOCK(me);
   3083 
   3084  rv = LockFileEx((HANDLE)f, LOCKFILE_EXCLUSIVE_LOCK, 0, 0x7fffffff, 0,
   3085                  &me->md.overlapped.overlapped);
   3086 
   3087  if (_native_threads_only) {
   3088    _PR_THREAD_LOCK(me);
   3089    me->io_pending = PR_FALSE;
   3090    me->state = _PR_RUNNING;
   3091    if (_PR_PENDING_INTERRUPT(me)) {
   3092      me->flags &= ~_PR_INTERRUPT;
   3093      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3094      _PR_THREAD_UNLOCK(me);
   3095      return PR_FAILURE;
   3096    }
   3097    _PR_THREAD_UNLOCK(me);
   3098 
   3099    if (rv == FALSE) {
   3100      err = GetLastError();
   3101      PR_ASSERT(err != ERROR_IO_PENDING);
   3102      _PR_MD_MAP_LOCKF_ERROR(err);
   3103      return PR_FAILURE;
   3104    }
   3105    return PR_SUCCESS;
   3106  }
   3107 
   3108  /* HACK AROUND NT BUG
   3109   * NT 3.51 has a bug.  In NT 3.51, if LockFileEx returns true, you
   3110   * don't get any completion on the completion port.  This is a bug.
   3111   *
   3112   * They fixed it on NT4.0 so that you do get a completion.
   3113   *
   3114   * If we pretend we won't get a completion, NSPR gets confused later
   3115   * when the unexpected completion arrives.  If we assume we do get
   3116   * a completion, we hang on 3.51.  Worse, Microsoft informs me that the
   3117   * behavior varies on 3.51 depending on if you are using a network
   3118   * file system or a local disk!
   3119   *
   3120   * Solution:  For now, _nt_version_gets_lockfile_completion is set
   3121   * depending on whether or not this system is EITHER
   3122   *      - running NT 4.0
   3123   *      - running NT 3.51 with a service pack greater than 5.
   3124   *
   3125   * In the meantime, this code may not work on network file systems.
   3126   *
   3127   */
   3128 
   3129  if (rv == FALSE && ((err = GetLastError()) != ERROR_IO_PENDING)) {
   3130    _PR_THREAD_LOCK(me);
   3131    me->io_pending = PR_FALSE;
   3132    me->state = _PR_RUNNING;
   3133    if (_PR_PENDING_INTERRUPT(me)) {
   3134      me->flags &= ~_PR_INTERRUPT;
   3135      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3136      _PR_THREAD_UNLOCK(me);
   3137      return PR_FAILURE;
   3138    }
   3139    _PR_THREAD_UNLOCK(me);
   3140 
   3141    _PR_MD_MAP_LOCKF_ERROR(err);
   3142    return PR_FAILURE;
   3143  }
   3144 #ifdef _NEED_351_FILE_LOCKING_HACK
   3145  else if (rv) {
   3146    /* If this is NT 3.51 and the file is local, then we won't get a
   3147     * completion back from LockFile when it succeeded.
   3148     */
   3149    if (_nt_version_gets_lockfile_completion == PR_FALSE) {
   3150      if (IsFileLocal((HANDLE)f) == _PR_LOCAL_FILE) {
   3151        me->io_pending = PR_FALSE;
   3152        me->state = _PR_RUNNING;
   3153        return PR_SUCCESS;
   3154      }
   3155    }
   3156  }
   3157 #endif /* _NEED_351_FILE_LOCKING_HACK */
   3158 
   3159  if (_NT_IO_WAIT(me, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
   3160    _PR_THREAD_LOCK(me);
   3161    me->io_pending = PR_FALSE;
   3162    me->state = _PR_RUNNING;
   3163    _PR_THREAD_UNLOCK(me);
   3164    return PR_FAILURE;
   3165  }
   3166 
   3167  if (me->md.blocked_io_status == 0) {
   3168    _PR_MD_MAP_LOCKF_ERROR(me->md.blocked_io_error);
   3169    return PR_FAILURE;
   3170  }
   3171 
   3172  return PR_SUCCESS;
   3173 }
   3174 
   3175 PRStatus _PR_MD_TLOCKFILE(PROsfd f) {
   3176  PRInt32 rv, err;
   3177  PRThread* me = _PR_MD_CURRENT_THREAD();
   3178 
   3179  if (me->io_suspended) {
   3180    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   3181    return PR_FAILURE;
   3182  }
   3183 
   3184  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   3185 
   3186  _PR_THREAD_LOCK(me);
   3187  if (_PR_PENDING_INTERRUPT(me)) {
   3188    me->flags &= ~_PR_INTERRUPT;
   3189    PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3190    _PR_THREAD_UNLOCK(me);
   3191    return -1;
   3192  }
   3193  me->io_pending = PR_TRUE;
   3194  me->state = _PR_IO_WAIT;
   3195  _PR_THREAD_UNLOCK(me);
   3196 
   3197  rv =
   3198      LockFileEx((HANDLE)f, LOCKFILE_FAIL_IMMEDIATELY | LOCKFILE_EXCLUSIVE_LOCK,
   3199                 0, 0x7fffffff, 0, &me->md.overlapped.overlapped);
   3200  if (_native_threads_only) {
   3201    _PR_THREAD_LOCK(me);
   3202    me->io_pending = PR_FALSE;
   3203    me->state = _PR_RUNNING;
   3204    if (_PR_PENDING_INTERRUPT(me)) {
   3205      me->flags &= ~_PR_INTERRUPT;
   3206      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3207      _PR_THREAD_UNLOCK(me);
   3208      return PR_FAILURE;
   3209    }
   3210    _PR_THREAD_UNLOCK(me);
   3211 
   3212    if (rv == FALSE) {
   3213      err = GetLastError();
   3214      PR_ASSERT(err != ERROR_IO_PENDING);
   3215      _PR_MD_MAP_LOCKF_ERROR(err);
   3216      return PR_FAILURE;
   3217    }
   3218    return PR_SUCCESS;
   3219  }
   3220  if (rv == FALSE && ((err = GetLastError()) != ERROR_IO_PENDING)) {
   3221    _PR_THREAD_LOCK(me);
   3222    me->io_pending = PR_FALSE;
   3223    me->state = _PR_RUNNING;
   3224    if (_PR_PENDING_INTERRUPT(me)) {
   3225      me->flags &= ~_PR_INTERRUPT;
   3226      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3227      _PR_THREAD_UNLOCK(me);
   3228      return PR_FAILURE;
   3229    }
   3230    _PR_THREAD_UNLOCK(me);
   3231 
   3232    _PR_MD_MAP_LOCKF_ERROR(err);
   3233    return PR_FAILURE;
   3234  }
   3235 #ifdef _NEED_351_FILE_LOCKING_HACK
   3236  else if (rv) {
   3237    /* If this is NT 3.51 and the file is local, then we won't get a
   3238     * completion back from LockFile when it succeeded.
   3239     */
   3240    if (_nt_version_gets_lockfile_completion == PR_FALSE) {
   3241      if (IsFileLocal((HANDLE)f) == _PR_LOCAL_FILE) {
   3242        _PR_THREAD_LOCK(me);
   3243        me->io_pending = PR_FALSE;
   3244        me->state = _PR_RUNNING;
   3245        if (_PR_PENDING_INTERRUPT(me)) {
   3246          me->flags &= ~_PR_INTERRUPT;
   3247          PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3248          _PR_THREAD_UNLOCK(me);
   3249          return PR_FAILURE;
   3250        }
   3251        _PR_THREAD_UNLOCK(me);
   3252 
   3253        return PR_SUCCESS;
   3254      }
   3255    }
   3256  }
   3257 #endif /* _NEED_351_FILE_LOCKING_HACK */
   3258 
   3259  if (_NT_IO_WAIT(me, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
   3260    _PR_THREAD_LOCK(me);
   3261    me->io_pending = PR_FALSE;
   3262    me->state = _PR_RUNNING;
   3263    if (_PR_PENDING_INTERRUPT(me)) {
   3264      me->flags &= ~_PR_INTERRUPT;
   3265      PR_SetError(PR_PENDING_INTERRUPT_ERROR, 0);
   3266      _PR_THREAD_UNLOCK(me);
   3267      return PR_FAILURE;
   3268    }
   3269    _PR_THREAD_UNLOCK(me);
   3270 
   3271    return PR_FAILURE;
   3272  }
   3273 
   3274  if (me->md.blocked_io_status == 0) {
   3275    _PR_MD_MAP_LOCKF_ERROR(me->md.blocked_io_error);
   3276    return PR_FAILURE;
   3277  }
   3278 
   3279  return PR_SUCCESS;
   3280 }
   3281 
   3282 PRStatus _PR_MD_UNLOCKFILE(PROsfd f) {
   3283  PRInt32 rv;
   3284  PRThread* me = _PR_MD_CURRENT_THREAD();
   3285 
   3286  if (me->io_suspended) {
   3287    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   3288    return PR_FAILURE;
   3289  }
   3290 
   3291  memset(&(me->md.overlapped.overlapped), 0, sizeof(OVERLAPPED));
   3292 
   3293  rv = UnlockFileEx((HANDLE)f, 0, 0x7fffffff, 0, &me->md.overlapped.overlapped);
   3294 
   3295  if (rv) {
   3296    return PR_SUCCESS;
   3297  } else {
   3298    int err = GetLastError();
   3299    _PR_MD_MAP_LOCKF_ERROR(err);
   3300    return PR_FAILURE;
   3301  }
   3302 }
   3303 
   3304 void _PR_MD_MAKE_NONBLOCK(PRFileDesc* f) {
   3305  /*
   3306   * On NT, we either call _md_Associate() or _md_MakeNonblock(),
   3307   * depending on whether the socket is blocking or not.
   3308   *
   3309   * Once we associate a socket with the io completion port,
   3310   * there is no way to disassociate it from the io completion
   3311   * port.  So we have to call _md_Associate/_md_MakeNonblock
   3312   * lazily.
   3313   */
   3314 }
   3315 
   3316 #ifdef _NEED_351_FILE_LOCKING_HACK
   3317 /***************
   3318 **
   3319 ** Lockfile hacks
   3320 **
   3321 ** The following code is a hack to work around a microsoft bug with lockfile.
   3322 ** The problem is that on NT 3.51, if LockFileEx() succeeds, you never
   3323 ** get a completion back for files that are on local disks.  So, we need to
   3324 ** know if a file is local or remote so we can tell if we should expect
   3325 ** a completion.
   3326 **
   3327 ** The only way to check if a file is local or remote based on the handle is
   3328 ** to get the serial number for the volume it is mounted on and then to
   3329 ** compare that with mounted drives.  This code caches the volume numbers of
   3330 ** fixed disks and does a relatively quick check.
   3331 **
   3332 ** Locking:  Since the only thing we ever do when multithreaded is a 32bit
   3333 **           assignment, we probably don't need locking.  It is included just
   3334 **           case anyway.
   3335 **
   3336 ** Limitations:  Does not work on floppies because they are too slow
   3337 **               Unknown if it will work on wierdo 3rd party file systems
   3338 **
   3339 ****************
   3340 */
   3341 
   3342 /* There can only be 26 drive letters on NT */
   3343 #  define _PR_MAX_DRIVES 26
   3344 
   3345 _MDLock cachedVolumeLock;
   3346 DWORD dwCachedVolumeSerialNumbers[_PR_MAX_DRIVES] = {0};
   3347 DWORD dwLastCachedDrive = 0;
   3348 DWORD dwRemoveableDrivesToCheck = 0; /* bitmask for removeable drives */
   3349 
   3350 PRBool IsFileLocalInit() {
   3351  TCHAR lpBuffer[_PR_MAX_DRIVES * 5];
   3352  DWORD nBufferLength = _PR_MAX_DRIVES * 5;
   3353  DWORD nBufferNeeded = GetLogicalDriveStrings(0, NULL);
   3354  DWORD dwIndex = 0;
   3355  DWORD dwDriveType;
   3356  DWORD dwVolumeSerialNumber;
   3357  DWORD dwDriveIndex = 0;
   3358  DWORD oldmode = (DWORD)-1;
   3359 
   3360  _MD_NEW_LOCK(&cachedVolumeLock);
   3361 
   3362  nBufferNeeded = GetLogicalDriveStrings(nBufferLength, lpBuffer);
   3363  if (nBufferNeeded == 0 || nBufferNeeded > nBufferLength) {
   3364    return PR_FALSE;
   3365  }
   3366 
   3367  // Calling GetVolumeInformation on a removeable drive where the
   3368  // disk is currently removed will cause a dialog box to the
   3369  // console.  This is not good.
   3370  // Temporarily disable the SEM_FAILCRITICALERRORS to avoid the
   3371  // damn dialog.
   3372 
   3373  dwCachedVolumeSerialNumbers[dwDriveIndex] = 0;
   3374  oldmode = SetErrorMode(SEM_FAILCRITICALERRORS);
   3375 
   3376  // now loop through the logical drives
   3377  while (lpBuffer[dwIndex] != TEXT('\0')) {
   3378    // skip the floppy drives.  This is *SLOW*
   3379    if ((lpBuffer[dwIndex] == TEXT('A')) || (lpBuffer[dwIndex] == TEXT('B')))
   3380      /* Skip over floppies */;
   3381    else {
   3382      dwDriveIndex = (lpBuffer[dwIndex] - TEXT('A'));
   3383 
   3384      dwDriveType = GetDriveType(&lpBuffer[dwIndex]);
   3385 
   3386      switch (dwDriveType) {
   3387        // Ignore these drive types
   3388        case 0:
   3389        case 1:
   3390        case DRIVE_REMOTE:
   3391        default:  // If the drive type is unknown, ignore it.
   3392          break;
   3393 
   3394        // Removable media drives can have different serial numbers
   3395        // at different times, so cache the current serial number
   3396        // but keep track of them so they can be rechecked if necessary.
   3397        case DRIVE_REMOVABLE:
   3398 
   3399        // CDROM is a removable media
   3400        case DRIVE_CDROM:
   3401 
   3402          // no idea if ramdisks can change serial numbers or not
   3403          // but it doesn't hurt to treat them as removable.
   3404 
   3405        case DRIVE_RAMDISK:
   3406 
   3407          // Here is where we keep track of removable drives.
   3408          dwRemoveableDrivesToCheck |= 1 << dwDriveIndex;
   3409 
   3410          // removable drives fall through to fixed drives and get cached.
   3411 
   3412        case DRIVE_FIXED:
   3413 
   3414          // cache volume serial numbers.
   3415          if (GetVolumeInformation(&lpBuffer[dwIndex], NULL, 0,
   3416                                   &dwVolumeSerialNumber, NULL, NULL, NULL,
   3417                                   0)) {
   3418            if (dwLastCachedDrive < dwDriveIndex) {
   3419              dwLastCachedDrive = dwDriveIndex;
   3420            }
   3421            dwCachedVolumeSerialNumbers[dwDriveIndex] = dwVolumeSerialNumber;
   3422          }
   3423 
   3424          break;
   3425      }
   3426    }
   3427 
   3428    dwIndex += lstrlen(&lpBuffer[dwIndex]) + 1;
   3429  }
   3430 
   3431  if (oldmode != (DWORD)-1) {
   3432    SetErrorMode(oldmode);
   3433    oldmode = (DWORD)-1;
   3434  }
   3435 
   3436  return PR_TRUE;
   3437 }
   3438 
   3439 PRInt32 IsFileLocal(HANDLE hFile) {
   3440  DWORD dwIndex = 0, dwMask;
   3441  BY_HANDLE_FILE_INFORMATION Info;
   3442  TCHAR szDrive[4] = TEXT("C:\\");
   3443  DWORD dwVolumeSerialNumber;
   3444  DWORD oldmode = (DWORD)-1;
   3445  int rv = _PR_REMOTE_FILE;
   3446 
   3447  if (!GetFileInformationByHandle(hFile, &Info)) {
   3448    return -1;
   3449  }
   3450 
   3451  // look to see if the volume serial number has been cached.
   3452  _MD_LOCK(&cachedVolumeLock);
   3453  while (dwIndex <= dwLastCachedDrive)
   3454    if (dwCachedVolumeSerialNumbers[dwIndex++] == Info.dwVolumeSerialNumber) {
   3455      _MD_UNLOCK(&cachedVolumeLock);
   3456      return _PR_LOCAL_FILE;
   3457    }
   3458  _MD_UNLOCK(&cachedVolumeLock);
   3459 
   3460  // volume serial number not found in the cache.  Check removable files.
   3461  // removable drives are noted as a bitmask.  If the bit associated with
   3462  // a specific drive is set, then we should query its volume serial number
   3463  // as its possible it has changed.
   3464  dwMask = dwRemoveableDrivesToCheck;
   3465  dwIndex = 0;
   3466 
   3467  while (dwMask) {
   3468    while (!(dwMask & 1)) {
   3469      dwIndex++;
   3470      dwMask = dwMask >> 1;
   3471    }
   3472 
   3473    szDrive[0] = TEXT('A') + (TCHAR)dwIndex;
   3474 
   3475    // Calling GetVolumeInformation on a removeable drive where the
   3476    // disk is currently removed will cause a dialog box to the
   3477    // console.  This is not good.
   3478    // Temporarily disable the SEM_FAILCRITICALERRORS to avoid the
   3479    // dialog.
   3480 
   3481    oldmode = SetErrorMode(SEM_FAILCRITICALERRORS);
   3482 
   3483    if (GetVolumeInformation(szDrive, NULL, 0, &dwVolumeSerialNumber, NULL,
   3484                             NULL, NULL, 0)) {
   3485      if (dwVolumeSerialNumber == Info.dwVolumeSerialNumber) {
   3486        _MD_LOCK(&cachedVolumeLock);
   3487        if (dwLastCachedDrive < dwIndex) {
   3488          dwLastCachedDrive = dwIndex;
   3489        }
   3490        dwCachedVolumeSerialNumbers[dwIndex] = dwVolumeSerialNumber;
   3491        _MD_UNLOCK(&cachedVolumeLock);
   3492        rv = _PR_LOCAL_FILE;
   3493      }
   3494    }
   3495    if (oldmode != (DWORD)-1) {
   3496      SetErrorMode(oldmode);
   3497      oldmode = (DWORD)-1;
   3498    }
   3499 
   3500    if (rv == _PR_LOCAL_FILE) {
   3501      return _PR_LOCAL_FILE;
   3502    }
   3503 
   3504    dwIndex++;
   3505    dwMask = dwMask >> 1;
   3506  }
   3507 
   3508  return _PR_REMOTE_FILE;
   3509 }
   3510 #endif /* _NEED_351_FILE_LOCKING_HACK */
   3511 
   3512 PR_IMPLEMENT(PRStatus) PR_NT_CancelIo(PRFileDesc* fd) {
   3513  PRThread* me = _PR_MD_CURRENT_THREAD();
   3514  PRBool fWait;
   3515  PRFileDesc* bottom;
   3516 
   3517  bottom = PR_GetIdentitiesLayer(fd, PR_NSPR_IO_LAYER);
   3518  if (!me->io_suspended || (NULL == bottom) ||
   3519      (me->io_fd != bottom->secret->md.osfd)) {
   3520    PR_SetError(PR_INVALID_STATE_ERROR, 0);
   3521    return PR_FAILURE;
   3522  }
   3523  /*
   3524   * The CancelIO operation has to be issued by the same NT thread that
   3525   * issued the I/O operation
   3526   */
   3527  PR_ASSERT(_PR_IS_NATIVE_THREAD(me) || (me->cpu == me->md.thr_bound_cpu));
   3528  if (me->io_pending) {
   3529    if (!CancelIo((HANDLE)bottom->secret->md.osfd)) {
   3530      PR_SetError(PR_INVALID_STATE_ERROR, GetLastError());
   3531      return PR_FAILURE;
   3532    }
   3533  }
   3534  _PR_THREAD_LOCK(me);
   3535  fWait = me->io_pending;
   3536  me->io_suspended = PR_FALSE;
   3537  me->state = _PR_IO_WAIT;
   3538  me->md.interrupt_disabled = PR_TRUE;
   3539  _PR_THREAD_UNLOCK(me);
   3540  if (fWait) {
   3541    _NT_IO_WAIT(me, PR_INTERVAL_NO_TIMEOUT);
   3542  }
   3543  PR_ASSERT(me->io_suspended == PR_FALSE);
   3544  PR_ASSERT(me->io_pending == PR_FALSE);
   3545 
   3546  _PR_THREAD_LOCK(me);
   3547  me->md.interrupt_disabled = PR_FALSE;
   3548  me->md.thr_bound_cpu = NULL;
   3549  me->io_suspended = PR_FALSE;
   3550  me->io_pending = PR_FALSE;
   3551  me->state = _PR_RUNNING;
   3552  _PR_THREAD_UNLOCK(me);
   3553  return PR_SUCCESS;
   3554 }
   3555 
   3556 static PROsfd _nt_nonblock_accept(PRFileDesc* fd, struct sockaddr* addr,
   3557                                  int* addrlen, PRIntervalTime timeout) {
   3558  PROsfd osfd = fd->secret->md.osfd;
   3559  SOCKET sock;
   3560  PRInt32 rv, err;
   3561  fd_set rd;
   3562  struct timeval tv, *tvp;
   3563 
   3564  FD_ZERO(&rd);
   3565  FD_SET((SOCKET)osfd, &rd);
   3566  if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3567    while ((sock = accept(osfd, addr, addrlen)) == -1) {
   3568      if (((err = WSAGetLastError()) == WSAEWOULDBLOCK) &&
   3569          (!fd->secret->nonblocking)) {
   3570        if ((rv = _PR_NTFiberSafeSelect(0, &rd, NULL, NULL, NULL)) == -1) {
   3571          _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3572          break;
   3573        }
   3574      } else {
   3575        _PR_MD_MAP_ACCEPT_ERROR(err);
   3576        break;
   3577      }
   3578    }
   3579  } else if (timeout == PR_INTERVAL_NO_WAIT) {
   3580    if ((sock = accept(osfd, addr, addrlen)) == -1) {
   3581      if (((err = WSAGetLastError()) == WSAEWOULDBLOCK) &&
   3582          (!fd->secret->nonblocking)) {
   3583        PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3584      } else {
   3585        _PR_MD_MAP_ACCEPT_ERROR(err);
   3586      }
   3587    }
   3588  } else {
   3589  retry:
   3590    if ((sock = accept(osfd, addr, addrlen)) == -1) {
   3591      if (((err = WSAGetLastError()) == WSAEWOULDBLOCK) &&
   3592          (!fd->secret->nonblocking)) {
   3593        tv.tv_sec = PR_IntervalToSeconds(timeout);
   3594        tv.tv_usec = PR_IntervalToMicroseconds(timeout -
   3595                                               PR_SecondsToInterval(tv.tv_sec));
   3596        tvp = &tv;
   3597 
   3598        rv = _PR_NTFiberSafeSelect(0, &rd, NULL, NULL, tvp);
   3599        if (rv > 0) {
   3600          goto retry;
   3601        } else if (rv == 0) {
   3602          PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3603        } else {
   3604          _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3605        }
   3606      } else {
   3607        _PR_MD_MAP_ACCEPT_ERROR(err);
   3608      }
   3609    }
   3610  }
   3611  return (PROsfd)sock;
   3612 }
   3613 
   3614 static PRInt32 _nt_nonblock_connect(PRFileDesc* fd, struct sockaddr* addr,
   3615                                    int addrlen, PRIntervalTime timeout) {
   3616  PROsfd osfd = fd->secret->md.osfd;
   3617  PRInt32 rv;
   3618  int err;
   3619  fd_set wr, ex;
   3620  struct timeval tv, *tvp;
   3621  int len;
   3622 
   3623  if ((rv = connect(osfd, addr, addrlen)) == -1) {
   3624    if ((err = WSAGetLastError()) == WSAEWOULDBLOCK) {
   3625      if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3626        tvp = NULL;
   3627      } else {
   3628        tv.tv_sec = PR_IntervalToSeconds(timeout);
   3629        tv.tv_usec = PR_IntervalToMicroseconds(timeout -
   3630                                               PR_SecondsToInterval(tv.tv_sec));
   3631        tvp = &tv;
   3632      }
   3633      FD_ZERO(&wr);
   3634      FD_ZERO(&ex);
   3635      FD_SET((SOCKET)osfd, &wr);
   3636      FD_SET((SOCKET)osfd, &ex);
   3637      if ((rv = _PR_NTFiberSafeSelect(0, NULL, &wr, &ex, tvp)) == -1) {
   3638        _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3639        return rv;
   3640      }
   3641      if (rv == 0) {
   3642        PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3643        return -1;
   3644      }
   3645      /* Call Sleep(0) to work around a Winsock timeing bug. */
   3646      Sleep(0);
   3647      if (FD_ISSET((SOCKET)osfd, &ex)) {
   3648        len = sizeof(err);
   3649        if (getsockopt(osfd, SOL_SOCKET, SO_ERROR, (char*)&err, &len) ==
   3650            SOCKET_ERROR) {
   3651          _PR_MD_MAP_GETSOCKOPT_ERROR(WSAGetLastError());
   3652          return -1;
   3653        }
   3654        _PR_MD_MAP_CONNECT_ERROR(err);
   3655        return -1;
   3656      }
   3657      PR_ASSERT(FD_ISSET((SOCKET)osfd, &wr));
   3658      rv = 0;
   3659    } else {
   3660      _PR_MD_MAP_CONNECT_ERROR(err);
   3661    }
   3662  }
   3663  return rv;
   3664 }
   3665 
   3666 static PRInt32 _nt_nonblock_recv(PRFileDesc* fd, char* buf, int len, int flags,
   3667                                 PRIntervalTime timeout) {
   3668  PROsfd osfd = fd->secret->md.osfd;
   3669  PRInt32 rv, err;
   3670  struct timeval tv, *tvp;
   3671  fd_set rd;
   3672  int osflags;
   3673 
   3674  if (0 == flags) {
   3675    osflags = 0;
   3676  } else {
   3677    PR_ASSERT(PR_MSG_PEEK == flags);
   3678    osflags = MSG_PEEK;
   3679  }
   3680  while ((rv = recv(osfd, buf, len, osflags)) == -1) {
   3681    if (((err = WSAGetLastError()) == WSAEWOULDBLOCK) &&
   3682        (!fd->secret->nonblocking)) {
   3683      FD_ZERO(&rd);
   3684      FD_SET((SOCKET)osfd, &rd);
   3685      if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3686        tvp = NULL;
   3687      } else {
   3688        tv.tv_sec = PR_IntervalToSeconds(timeout);
   3689        tv.tv_usec = PR_IntervalToMicroseconds(timeout -
   3690                                               PR_SecondsToInterval(tv.tv_sec));
   3691        tvp = &tv;
   3692      }
   3693      if ((rv = _PR_NTFiberSafeSelect(0, &rd, NULL, NULL, tvp)) == -1) {
   3694        _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3695        break;
   3696      } else if (rv == 0) {
   3697        PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3698        rv = -1;
   3699        break;
   3700      }
   3701    } else {
   3702      _PR_MD_MAP_RECV_ERROR(err);
   3703      break;
   3704    }
   3705  }
   3706  return (rv);
   3707 }
   3708 
   3709 static PRInt32 _nt_nonblock_send(PRFileDesc* fd, char* buf, int len,
   3710                                 PRIntervalTime timeout) {
   3711  PROsfd osfd = fd->secret->md.osfd;
   3712  PRInt32 rv, err;
   3713  struct timeval tv, *tvp;
   3714  fd_set wd;
   3715  PRInt32 bytesSent = 0;
   3716 
   3717  while (bytesSent < len) {
   3718    while ((rv = send(osfd, buf, len, 0)) == -1) {
   3719      if (((err = WSAGetLastError()) == WSAEWOULDBLOCK) &&
   3720          (!fd->secret->nonblocking)) {
   3721        if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3722          tvp = NULL;
   3723        } else {
   3724          tv.tv_sec = PR_IntervalToSeconds(timeout);
   3725          tv.tv_usec = PR_IntervalToMicroseconds(
   3726              timeout - PR_SecondsToInterval(tv.tv_sec));
   3727          tvp = &tv;
   3728        }
   3729        FD_ZERO(&wd);
   3730        FD_SET((SOCKET)osfd, &wd);
   3731        if ((rv = _PR_NTFiberSafeSelect(0, NULL, &wd, NULL, tvp)) == -1) {
   3732          _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3733          return -1;
   3734        }
   3735        if (rv == 0) {
   3736          PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3737          return -1;
   3738        }
   3739      } else {
   3740        _PR_MD_MAP_SEND_ERROR(err);
   3741        return -1;
   3742      }
   3743    }
   3744    bytesSent += rv;
   3745    if (fd->secret->nonblocking) {
   3746      break;
   3747    }
   3748    if (bytesSent < len) {
   3749      if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3750        tvp = NULL;
   3751      } else {
   3752        tv.tv_sec = PR_IntervalToSeconds(timeout);
   3753        tv.tv_usec = PR_IntervalToMicroseconds(timeout -
   3754                                               PR_SecondsToInterval(tv.tv_sec));
   3755        tvp = &tv;
   3756      }
   3757      FD_ZERO(&wd);
   3758      FD_SET((SOCKET)osfd, &wd);
   3759      if ((rv = _PR_NTFiberSafeSelect(0, NULL, &wd, NULL, tvp)) == -1) {
   3760        _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3761        return -1;
   3762      }
   3763      if (rv == 0) {
   3764        PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3765        return -1;
   3766      }
   3767    }
   3768  }
   3769  return bytesSent;
   3770 }
   3771 
   3772 static PRInt32 _nt_nonblock_writev(PRFileDesc* fd, const PRIOVec* iov, int size,
   3773                                   PRIntervalTime timeout) {
   3774  int index;
   3775  int sent = 0;
   3776  int rv;
   3777 
   3778  for (index = 0; index < size; index++) {
   3779    rv =
   3780        _nt_nonblock_send(fd, iov[index].iov_base, iov[index].iov_len, timeout);
   3781    if (rv > 0) {
   3782      sent += rv;
   3783    }
   3784    if (rv != iov[index].iov_len) {
   3785      if (rv < 0) {
   3786        if (fd->secret->nonblocking &&
   3787            (PR_GetError() == PR_WOULD_BLOCK_ERROR) && (sent > 0)) {
   3788          return sent;
   3789        } else {
   3790          return -1;
   3791        }
   3792      }
   3793      /* Only a nonblocking socket can have partial sends */
   3794      PR_ASSERT(fd->secret->nonblocking);
   3795      return sent;
   3796    }
   3797  }
   3798 
   3799  return sent;
   3800 }
   3801 
   3802 static PRInt32 _nt_nonblock_sendto(PRFileDesc* fd, const char* buf, int len,
   3803                                   const struct sockaddr* addr, int addrlen,
   3804                                   PRIntervalTime timeout) {
   3805  PROsfd osfd = fd->secret->md.osfd;
   3806  PRInt32 rv, err;
   3807  struct timeval tv, *tvp;
   3808  fd_set wd;
   3809  PRInt32 bytesSent = 0;
   3810 
   3811  while (bytesSent < len) {
   3812    while ((rv = sendto(osfd, buf, len, 0, addr, addrlen)) == -1) {
   3813      if (((err = WSAGetLastError()) == WSAEWOULDBLOCK) &&
   3814          (!fd->secret->nonblocking)) {
   3815        if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3816          tvp = NULL;
   3817        } else {
   3818          tv.tv_sec = PR_IntervalToSeconds(timeout);
   3819          tv.tv_usec = PR_IntervalToMicroseconds(
   3820              timeout - PR_SecondsToInterval(tv.tv_sec));
   3821          tvp = &tv;
   3822        }
   3823        FD_ZERO(&wd);
   3824        FD_SET((SOCKET)osfd, &wd);
   3825        if ((rv = _PR_NTFiberSafeSelect(0, NULL, &wd, NULL, tvp)) == -1) {
   3826          _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3827          return -1;
   3828        }
   3829        if (rv == 0) {
   3830          PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3831          return -1;
   3832        }
   3833      } else {
   3834        _PR_MD_MAP_SENDTO_ERROR(err);
   3835        return -1;
   3836      }
   3837    }
   3838    bytesSent += rv;
   3839    if (fd->secret->nonblocking) {
   3840      break;
   3841    }
   3842    if (bytesSent < len) {
   3843      if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3844        tvp = NULL;
   3845      } else {
   3846        tv.tv_sec = PR_IntervalToSeconds(timeout);
   3847        tv.tv_usec = PR_IntervalToMicroseconds(timeout -
   3848                                               PR_SecondsToInterval(tv.tv_sec));
   3849        tvp = &tv;
   3850      }
   3851      FD_ZERO(&wd);
   3852      FD_SET((SOCKET)osfd, &wd);
   3853      if ((rv = _PR_NTFiberSafeSelect(0, NULL, &wd, NULL, tvp)) == -1) {
   3854        _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3855        return -1;
   3856      }
   3857      if (rv == 0) {
   3858        PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3859        return -1;
   3860      }
   3861    }
   3862  }
   3863  return bytesSent;
   3864 }
   3865 
   3866 static PRInt32 _nt_nonblock_recvfrom(PRFileDesc* fd, char* buf, int len,
   3867                                     struct sockaddr* addr, int* addrlen,
   3868                                     PRIntervalTime timeout) {
   3869  PROsfd osfd = fd->secret->md.osfd;
   3870  PRInt32 rv, err;
   3871  struct timeval tv, *tvp;
   3872  fd_set rd;
   3873 
   3874  while ((rv = recvfrom(osfd, buf, len, 0, addr, addrlen)) == -1) {
   3875    if (((err = WSAGetLastError()) == WSAEWOULDBLOCK) &&
   3876        (!fd->secret->nonblocking)) {
   3877      if (timeout == PR_INTERVAL_NO_TIMEOUT) {
   3878        tvp = NULL;
   3879      } else {
   3880        tv.tv_sec = PR_IntervalToSeconds(timeout);
   3881        tv.tv_usec = PR_IntervalToMicroseconds(timeout -
   3882                                               PR_SecondsToInterval(tv.tv_sec));
   3883        tvp = &tv;
   3884      }
   3885      FD_ZERO(&rd);
   3886      FD_SET((SOCKET)osfd, &rd);
   3887      if ((rv = _PR_NTFiberSafeSelect(0, &rd, NULL, NULL, tvp)) == -1) {
   3888        _PR_MD_MAP_SELECT_ERROR(WSAGetLastError());
   3889        break;
   3890      } else if (rv == 0) {
   3891        PR_SetError(PR_IO_TIMEOUT_ERROR, 0);
   3892        rv = -1;
   3893        break;
   3894      }
   3895    } else {
   3896      _PR_MD_MAP_RECVFROM_ERROR(err);
   3897      break;
   3898    }
   3899  }
   3900  return (rv);
   3901 }
   3902 
   3903 /*
   3904 * UDP support: the continuation thread functions and recvfrom and sendto.
   3905 */
   3906 
   3907 static void pt_InsertTimedInternal(pt_Continuation* op) {
   3908  PRInt32 delta = 0;
   3909  pt_Continuation* t_op = NULL;
   3910  PRIntervalTime now = PR_IntervalNow(), op_tmo, qd_tmo;
   3911 
   3912  /*
   3913   * If this element operation isn't timed, it gets queued at the
   3914   * end of the list (just after pt_tq.tail) and we're
   3915   * finishd early.
   3916   */
   3917  if (PR_INTERVAL_NO_TIMEOUT == op->timeout) {
   3918    t_op = pt_tq.tail; /* put it at the end */
   3919    goto done;
   3920  }
   3921 
   3922  /*
   3923   * The rest of this routine actaully deals with timed ops.
   3924   */
   3925 
   3926  if (NULL != pt_tq.op) {
   3927    /*
   3928     * To find where in the list to put the new operation, form
   3929     * the absolute time the operations in question will expire.
   3930     *
   3931     * The new operation ('op') will expire at now() + op->timeout.
   3932     *
   3933     * The operation that will time out furthest in the future will
   3934     * do so at pt_tq.epoch + pt_tq.op->timeout.
   3935     *
   3936     * Subsequently earlier timeouts are computed based on the latter
   3937     * knowledge by subracting the timeout deltas that are stored in
   3938     * the operation list. There are operation[n]->timeout ticks
   3939     * between the expiration of operation[n-1] and operation[n].e e
   3940     *
   3941     * Therefore, the operation[n-1] will expire operation[n]->timeout
   3942     * ticks prior to operation[n].
   3943     *
   3944     * This should be easy!
   3945     */
   3946    t_op = pt_tq.op;                      /* running pointer to queued op */
   3947    op_tmo = now + op->timeout;           /* that's in absolute ticks */
   3948    qd_tmo = pt_tq.epoch + t_op->timeout; /* likewise */
   3949 
   3950    do {
   3951      /*
   3952       * If 'op' expires later than t_op, then insert 'op' just
   3953       * ahead of t_op. Otherwise, compute when operation[n-1]
   3954       * expires and try again.
   3955       *
   3956       * The actual different between the expiriation of 'op'
   3957       * and the current operation what becomes the new operaton's
   3958       * timeout interval. That interval is also subtracted from
   3959       * the interval of the operation immediately following where
   3960       * we stick 'op' (unless the next one isn't timed). The new
   3961       * timeout assigned to 'op' takes into account the values of
   3962       * now() and when the previous intervals were compured.
   3963       */
   3964      delta = op_tmo - qd_tmo;
   3965      if (delta >= 0) {
   3966        op->timeout += (now - pt_tq.epoch);
   3967        goto done;
   3968      }
   3969 
   3970      qd_tmo -= t_op->timeout; /* previous operaton expiration */
   3971      t_op = t_op->prev;       /* point to previous operation */
   3972      if (NULL != t_op) {
   3973        qd_tmo += t_op->timeout;
   3974      }
   3975    } while (NULL != t_op);
   3976 
   3977    /*
   3978     * If we got here we backed off the head of the list. That means that
   3979     * this timed entry has to go at the head of the list. This is just
   3980     * about like having an empty timer list.
   3981     */
   3982    delta = op->timeout; /* $$$ is this right? */
   3983  }
   3984 
   3985 done:
   3986 
   3987  /*
   3988   * Insert 'op' into the queue just after t_op or if t_op is null,
   3989   * at the head of the list.
   3990   *
   3991   * If t_op is NULL, the list is currently empty and this is pretty
   3992   * easy.
   3993   */
   3994  if (NULL == t_op) {
   3995    op->prev = NULL;
   3996    op->next = pt_tq.head;
   3997    pt_tq.head = op;
   3998    if (NULL == pt_tq.tail) {
   3999      pt_tq.tail = op;
   4000    } else {
   4001      op->next->prev = op;
   4002    }
   4003  } else {
   4004    op->prev = t_op;
   4005    op->next = t_op->next;
   4006    if (NULL != op->prev) {
   4007      op->prev->next = op;
   4008    }
   4009    if (NULL != op->next) {
   4010      op->next->prev = op;
   4011    }
   4012    if (t_op == pt_tq.tail) {
   4013      pt_tq.tail = op;
   4014    }
   4015  }
   4016 
   4017  /*
   4018   * Are we adjusting our epoch, etc? Are we replacing
   4019   * what was previously the element due to expire furthest
   4020   * out in the future? Is this even a timed operation?
   4021   */
   4022  if (PR_INTERVAL_NO_TIMEOUT != op->timeout) {
   4023    if ((NULL == pt_tq.op)     /* we're the one and only */
   4024        || (t_op == pt_tq.op)) /* we're replacing */
   4025    {
   4026      pt_tq.op = op;
   4027      pt_tq.epoch = now;
   4028    }
   4029  }
   4030 
   4031  pt_tq.op_count += 1;
   4032 
   4033 } /* pt_InsertTimedInternal */
   4034 
   4035 /*
   4036 * function: pt_FinishTimed
   4037 *
   4038 * Takes the finished operation out of the timed queue. It
   4039 * notifies the initiating thread that the opertions is
   4040 * complete and returns to the caller the value of the next
   4041 * operation in the list (or NULL).
   4042 */
   4043 static pt_Continuation* pt_FinishTimedInternal(pt_Continuation* op) {
   4044  pt_Continuation* next;
   4045 
   4046  /* remove this one from the list */
   4047  if (NULL == op->prev) {
   4048    pt_tq.head = op->next;
   4049  } else {
   4050    op->prev->next = op->next;
   4051  }
   4052  if (NULL == op->next) {
   4053    pt_tq.tail = op->prev;
   4054  } else {
   4055    op->next->prev = op->prev;
   4056  }
   4057 
   4058  /* did we happen to hit the timed op? */
   4059  if (op == pt_tq.op) {
   4060    pt_tq.op = op->prev;
   4061  }
   4062 
   4063  next = op->next;
   4064  op->next = op->prev = NULL;
   4065  op->status = pt_continuation_done;
   4066 
   4067  pt_tq.op_count -= 1;
   4068 #if defined(DEBUG)
   4069  pt_debug.continuationsServed += 1;
   4070 #endif
   4071  PR_NotifyCondVar(op->complete);
   4072 
   4073  return next;
   4074 } /* pt_FinishTimedInternal */
   4075 
   4076 static void ContinuationThread(void* arg) {
   4077  /* initialization */
   4078  fd_set readSet, writeSet, exceptSet;
   4079  struct timeval tv;
   4080  SOCKET* pollingList = 0;          /* list built for polling */
   4081  PRIntn pollingListUsed;           /* # entries used in the list */
   4082  PRIntn pollingListNeeded;         /* # entries needed this time */
   4083  PRIntn pollingSlotsAllocated = 0; /* # entries available in list */
   4084  PRIntervalTime mx_select_ticks =
   4085      PR_MillisecondsToInterval(PT_DEFAULT_SELECT_MSEC);
   4086 
   4087  /* do some real work */
   4088  while (1) {
   4089    PRIntn rv;
   4090    PRStatus status;
   4091    PRIntn pollIndex;
   4092    pt_Continuation* op;
   4093    PRIntervalTime now = PR_IntervalNow();
   4094    PRIntervalTime timeout = PR_INTERVAL_NO_TIMEOUT;
   4095 
   4096    PR_Lock(pt_tq.ml);
   4097    while (NULL == pt_tq.head) {
   4098      status = PR_WaitCondVar(pt_tq.new_op, PR_INTERVAL_NO_TIMEOUT);
   4099      if ((PR_FAILURE == status) &&
   4100          (PR_PENDING_INTERRUPT_ERROR == PR_GetError())) {
   4101        break;
   4102      }
   4103    }
   4104    pollingListNeeded = pt_tq.op_count;
   4105    PR_Unlock(pt_tq.ml);
   4106 
   4107    /* Okay. We're history */
   4108    if ((PR_FAILURE == status) &&
   4109        (PR_PENDING_INTERRUPT_ERROR == PR_GetError())) {
   4110      break;
   4111    }
   4112 
   4113    /*
   4114     * We are not holding the pt_tq.ml lock now, so more items may
   4115     * get added to pt_tq during this window of time.  We hope
   4116     * that 10 more spaces in the polling list should be enough.
   4117     */
   4118 
   4119    FD_ZERO(&readSet);
   4120    FD_ZERO(&writeSet);
   4121    FD_ZERO(&exceptSet);
   4122    pollingListNeeded += 10;
   4123    if (pollingListNeeded > pollingSlotsAllocated) {
   4124      if (NULL != pollingList) {
   4125        PR_DELETE(pollingList);
   4126      }
   4127      pollingList = PR_MALLOC(pollingListNeeded * sizeof(PRPollDesc));
   4128      PR_ASSERT(NULL != pollingList);
   4129      pollingSlotsAllocated = pollingListNeeded;
   4130    }
   4131 
   4132 #if defined(DEBUG)
   4133    if (pollingListNeeded > pt_debug.pollingListMax) {
   4134      pt_debug.pollingListMax = pollingListUsed;
   4135    }
   4136 #endif
   4137 
   4138    /*
   4139     * Build up a polling list.
   4140     * This list is sorted on time. Operations that have been
   4141     * interrupted are completed and not included in the list.
   4142     * There is an assertion that the operation is in progress.
   4143     */
   4144    pollingListUsed = 0;
   4145    PR_Lock(pt_tq.ml);
   4146 
   4147    for (op = pt_tq.head; NULL != op;) {
   4148      if (pt_continuation_abort == op->status) {
   4149        op->result.code = -1;
   4150        op->syserrno = WSAEINTR;
   4151        op = pt_FinishTimedInternal(op);
   4152      } else {
   4153        PR_ASSERT(pt_continuation_done != op->status);
   4154        op->status = pt_continuation_inprogress;
   4155        if (op->event & PR_POLL_READ) {
   4156          FD_SET(op->arg1.osfd, &readSet);
   4157        }
   4158        if (op->event & PR_POLL_WRITE) {
   4159          FD_SET(op->arg1.osfd, &writeSet);
   4160        }
   4161        if (op->event & PR_POLL_EXCEPT) {
   4162          FD_SET(op->arg1.osfd, &exceptSet);
   4163        }
   4164        pollingList[pollingListUsed] = op->arg1.osfd;
   4165        pollingListUsed += 1;
   4166        if (pollingListUsed == pollingSlotsAllocated) {
   4167          break;
   4168        }
   4169        op = op->next;
   4170      }
   4171    }
   4172 
   4173    PR_Unlock(pt_tq.ml);
   4174 
   4175    /*
   4176     * If 'op' isn't NULL at this point, then we didn't get to
   4177     * the end of the list. That means that more items got added
   4178     * to the list than we anticipated. So, forget this iteration,
   4179     * go around the horn again.
   4180     * One would hope this doesn't happen all that often.
   4181     */
   4182    if (NULL != op) {
   4183 #if defined(DEBUG)
   4184      pt_debug.predictionsFoiled += 1; /* keep track */
   4185 #endif
   4186      continue; /* make it rethink things */
   4187    }
   4188 
   4189    /* there's a chance that all ops got blown away */
   4190    if (NULL == pt_tq.head) {
   4191      continue;
   4192    }
   4193    /* if not, we know this is the shortest timeout */
   4194    timeout = pt_tq.head->timeout;
   4195 
   4196    /*
   4197     * We don't want to wait forever on this poll. So keep
   4198     * the interval down. The operations, if they are timed,
   4199     * still have to timeout, while those that are not timed
   4200     * should persist forever. But they may be aborted. That's
   4201     * what this anxiety is all about.
   4202     */
   4203    if (timeout > mx_select_ticks) {
   4204      timeout = mx_select_ticks;
   4205    }
   4206 
   4207    if (PR_INTERVAL_NO_TIMEOUT != pt_tq.head->timeout) {
   4208      pt_tq.head->timeout -= timeout;
   4209    }
   4210    tv.tv_sec = PR_IntervalToSeconds(timeout);
   4211    tv.tv_usec = PR_IntervalToMicroseconds(timeout) % PR_USEC_PER_SEC;
   4212 
   4213    rv = select(0, &readSet, &writeSet, &exceptSet, &tv);
   4214 
   4215    if (0 == rv) /* poll timed out - what about leading op? */
   4216    {
   4217      if (0 == pt_tq.head->timeout) {
   4218        /*
   4219         * The leading element of the timed queue has timed
   4220         * out. Get rid of it. In any case go around the
   4221         * loop again, computing the polling list, checking
   4222         * for interrupted operations.
   4223         */
   4224        PR_Lock(pt_tq.ml);
   4225        do {
   4226          pt_tq.head->result.code = -1;
   4227          pt_tq.head->syserrno = WSAETIMEDOUT;
   4228          op = pt_FinishTimedInternal(pt_tq.head);
   4229        } while ((NULL != op) && (0 == op->timeout));
   4230        PR_Unlock(pt_tq.ml);
   4231      }
   4232      continue;
   4233    }
   4234 
   4235    if (-1 == rv && (WSAGetLastError() == WSAEINTR ||
   4236                     WSAGetLastError() == WSAEINPROGRESS)) {
   4237      continue; /* go around the loop again */
   4238    }
   4239 
   4240    /*
   4241     * select() says that something in our list is ready for some more
   4242     * action or is an invalid fd. Find it, load up the operation and
   4243     * see what happens.
   4244     */
   4245 
   4246    PR_ASSERT(rv > 0 || WSAGetLastError() == WSAENOTSOCK);
   4247 
   4248    /*
   4249     * $$$ There's a problem here. I'm running the operations list
   4250     * and I'm not holding any locks. I don't want to hold the lock
   4251     * and do the operation, so this is really messed up..
   4252     *
   4253     * This may work out okay. The rule is that only this thread,
   4254     * the continuation thread, can remove elements from the list.
   4255     * Therefore, the list is at worst, longer than when we built
   4256     * the polling list.
   4257     */
   4258    op = pt_tq.head;
   4259    for (pollIndex = 0; pollIndex < pollingListUsed; ++pollIndex) {
   4260      PRInt16 revents = 0;
   4261 
   4262      PR_ASSERT(NULL != op);
   4263 
   4264      /*
   4265       * This one wants attention. Redo the operation.
   4266       * We know that there can only be more elements
   4267       * in the op list than we knew about when we created
   4268       * the poll list. Therefore, we might have to skip
   4269       * a few ops to find the right one to operation on.
   4270       */
   4271      while (pollingList[pollIndex] != op->arg1.osfd) {
   4272        op = op->next;
   4273        PR_ASSERT(NULL != op);
   4274      }
   4275 
   4276      if (FD_ISSET(op->arg1.osfd, &readSet)) {
   4277        revents |= PR_POLL_READ;
   4278      }
   4279      if (FD_ISSET(op->arg1.osfd, &writeSet)) {
   4280        revents |= PR_POLL_WRITE;
   4281      }
   4282      if (FD_ISSET(op->arg1.osfd, &exceptSet)) {
   4283        revents |= PR_POLL_EXCEPT;
   4284      }
   4285 
   4286      /*
   4287       * Sip over all those not in progress. They'll be
   4288       * pruned next time we build a polling list. Call
   4289       * the continuation function. If it reports completion,
   4290       * finish off the operation.
   4291       */
   4292      if (revents && (pt_continuation_inprogress == op->status) &&
   4293          (op->function(op, revents))) {
   4294        PR_Lock(pt_tq.ml);
   4295        op = pt_FinishTimedInternal(op);
   4296        PR_Unlock(pt_tq.ml);
   4297      }
   4298    }
   4299  }
   4300  if (NULL != pollingList) {
   4301    PR_DELETE(pollingList);
   4302  }
   4303 } /* ContinuationThread */
   4304 
   4305 static int pt_Continue(pt_Continuation* op) {
   4306  PRStatus rv;
   4307  /* Finish filling in the blank slots */
   4308  op->status = pt_continuation_sumbitted;
   4309  op->complete = PR_NewCondVar(pt_tq.ml);
   4310 
   4311  PR_Lock(pt_tq.ml); /* we provide the locking */
   4312 
   4313  pt_InsertTimedInternal(op); /* insert in the structure */
   4314 
   4315  PR_NotifyCondVar(pt_tq.new_op); /* notify the continuation thread */
   4316 
   4317  while (pt_continuation_done != op->status) /* wait for completion */
   4318  {
   4319    rv = PR_WaitCondVar(op->complete, PR_INTERVAL_NO_TIMEOUT);
   4320    /*
   4321     * If we get interrupted, we set state the continuation thread will
   4322     * see and allow it to finish the I/O operation w/ error. That way
   4323     * the rule that only the continuation thread is removing elements
   4324     * from the list is still valid.
   4325     *
   4326     * Don't call interrupt on the continuation thread. That'll just
   4327     * piss him off. He's cycling around at least every mx_select_ticks
   4328     * anyhow and should notice the request in there.
   4329     */
   4330    if ((PR_FAILURE == rv) && (PR_PENDING_INTERRUPT_ERROR == PR_GetError())) {
   4331      op->status = pt_continuation_abort; /* our status */
   4332    }
   4333  }
   4334 
   4335  PR_Unlock(pt_tq.ml); /* we provide the locking */
   4336 
   4337  PR_DestroyCondVar(op->complete);
   4338 
   4339  return op->result.code; /* and the primary answer */
   4340 } /* pt_Continue */
   4341 
   4342 static PRBool pt_sendto_cont(pt_Continuation* op, PRInt16 revents) {
   4343  PRIntn bytes =
   4344      sendto(op->arg1.osfd, op->arg2.buffer, op->arg3.amount, op->arg4.flags,
   4345             (struct sockaddr*)op->arg5.addr, sizeof(*(op->arg5.addr)));
   4346  op->syserrno = WSAGetLastError();
   4347  if (bytes > 0) /* this is progress */
   4348  {
   4349    char* bp = op->arg2.buffer;
   4350    bp += bytes; /* adjust the buffer pointer */
   4351    op->arg2.buffer = bp;
   4352    op->result.code += bytes; /* accumulate the number sent */
   4353    op->arg3.amount -= bytes; /* and reduce the required count */
   4354    return (0 == op->arg3.amount) ? PR_TRUE : PR_FALSE;
   4355  } else
   4356    return ((-1 == bytes) && (WSAEWOULDBLOCK == op->syserrno)) ? PR_FALSE
   4357                                                               : PR_TRUE;
   4358 } /* pt_sendto_cont */
   4359 
   4360 static PRBool pt_recvfrom_cont(pt_Continuation* op, PRInt16 revents) {
   4361  PRIntn addr_len = sizeof(*(op->arg5.addr));
   4362  op->result.code =
   4363      recvfrom(op->arg1.osfd, op->arg2.buffer, op->arg3.amount, op->arg4.flags,
   4364               (struct sockaddr*)op->arg5.addr, &addr_len);
   4365  op->syserrno = WSAGetLastError();
   4366  return ((-1 == op->result.code) && (WSAEWOULDBLOCK == op->syserrno))
   4367             ? PR_FALSE
   4368             : PR_TRUE;
   4369 } /* pt_recvfrom_cont */
   4370 
   4371 static PRInt32 pt_SendTo(SOCKET osfd, const void* buf, PRInt32 amount,
   4372                         PRInt32 flags, const PRNetAddr* addr, PRIntn addrlen,
   4373                         PRIntervalTime timeout) {
   4374  PRInt32 bytes = -1, err;
   4375  PRBool fNeedContinue = PR_FALSE;
   4376 
   4377  bytes = sendto(osfd, buf, amount, flags, (struct sockaddr*)addr,
   4378                 PR_NETADDR_SIZE(addr));
   4379  if (bytes == -1) {
   4380    if ((err = WSAGetLastError()) == WSAEWOULDBLOCK) {
   4381      fNeedContinue = PR_TRUE;
   4382    } else {
   4383      _PR_MD_MAP_SENDTO_ERROR(err);
   4384    }
   4385  }
   4386  if (fNeedContinue == PR_TRUE) {
   4387    pt_Continuation op;
   4388    op.arg1.osfd = osfd;
   4389    op.arg2.buffer = (void*)buf;
   4390    op.arg3.amount = amount;
   4391    op.arg4.flags = flags;
   4392    op.arg5.addr = (PRNetAddr*)addr;
   4393    op.timeout = timeout;
   4394    op.result.code = 0; /* initialize the number sent */
   4395    op.function = pt_sendto_cont;
   4396    op.event = PR_POLL_WRITE | PR_POLL_EXCEPT;
   4397    bytes = pt_Continue(&op);
   4398    if (bytes < 0) {
   4399      WSASetLastError(op.syserrno);
   4400      _PR_MD_MAP_SENDTO_ERROR(op.syserrno);
   4401    }
   4402  }
   4403  return bytes;
   4404 } /* pt_SendTo */
   4405 
   4406 static PRInt32 pt_RecvFrom(SOCKET osfd, void* buf, PRInt32 amount,
   4407                           PRInt32 flags, PRNetAddr* addr, PRIntn* addr_len,
   4408                           PRIntervalTime timeout) {
   4409  PRInt32 bytes = -1, err;
   4410  PRBool fNeedContinue = PR_FALSE;
   4411 
   4412  bytes = recvfrom(osfd, buf, amount, flags, (struct sockaddr*)addr, addr_len);
   4413  if (bytes == -1) {
   4414    if ((err = WSAGetLastError()) == WSAEWOULDBLOCK) {
   4415      fNeedContinue = PR_TRUE;
   4416    } else {
   4417      _PR_MD_MAP_RECVFROM_ERROR(err);
   4418    }
   4419  }
   4420 
   4421  if (fNeedContinue == PR_TRUE) {
   4422    pt_Continuation op;
   4423    op.arg1.osfd = osfd;
   4424    op.arg2.buffer = buf;
   4425    op.arg3.amount = amount;
   4426    op.arg4.flags = flags;
   4427    op.arg5.addr = addr;
   4428    op.timeout = timeout;
   4429    op.function = pt_recvfrom_cont;
   4430    op.event = PR_POLL_READ | PR_POLL_EXCEPT;
   4431    bytes = pt_Continue(&op);
   4432    if (bytes < 0) {
   4433      WSASetLastError(op.syserrno);
   4434      _PR_MD_MAP_RECVFROM_ERROR(op.syserrno);
   4435    }
   4436  }
   4437  return bytes;
   4438 } /* pt_RecvFrom */