]> granicus.if.org Git - postgresql/blob - src/backend/port/win32/socket.c
Attempt to fix some issues in our Windows socket code.
[postgresql] / src / backend / port / win32 / socket.c
1 /*-------------------------------------------------------------------------
2  *
3  * socket.c
4  *        Microsoft Windows Win32 Socket Functions
5  *
6  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *        src/backend/port/win32/socket.c
10  *
11  *-------------------------------------------------------------------------
12  */
13
14 #include "postgres.h"
15
16 /*
17  * Indicate if pgwin32_recv() and pgwin32_send() should operate
18  * in non-blocking mode.
19  *
20  * Since the socket emulation layer always sets the actual socket to
21  * non-blocking mode in order to be able to deliver signals, we must
22  * specify this in a separate flag if we actually need non-blocking
23  * operation.
24  *
25  * This flag changes the behaviour *globally* for all socket operations,
26  * so it should only be set for very short periods of time.
27  */
28 int                     pgwin32_noblock = 0;
29
30 #undef socket
31 #undef accept
32 #undef connect
33 #undef select
34 #undef recv
35 #undef send
36
37 /*
38  * Blocking socket functions implemented so they listen on both
39  * the socket and the signal event, required for signal handling.
40  */
41
42 /*
43  * Convert the last socket error code into errno
44  */
45 static void
46 TranslateSocketError(void)
47 {
48         switch (WSAGetLastError())
49         {
50                 case WSANOTINITIALISED:
51                 case WSAENETDOWN:
52                 case WSAEINPROGRESS:
53                 case WSAEINVAL:
54                 case WSAESOCKTNOSUPPORT:
55                 case WSAEFAULT:
56                 case WSAEINVALIDPROVIDER:
57                 case WSAEINVALIDPROCTABLE:
58                 case WSAEMSGSIZE:
59                         errno = EINVAL;
60                         break;
61                 case WSAEAFNOSUPPORT:
62                         errno = EAFNOSUPPORT;
63                         break;
64                 case WSAEMFILE:
65                         errno = EMFILE;
66                         break;
67                 case WSAENOBUFS:
68                         errno = ENOBUFS;
69                         break;
70                 case WSAEPROTONOSUPPORT:
71                 case WSAEPROTOTYPE:
72                         errno = EPROTONOSUPPORT;
73                         break;
74                 case WSAECONNREFUSED:
75                         errno = ECONNREFUSED;
76                         break;
77                 case WSAEINTR:
78                         errno = EINTR;
79                         break;
80                 case WSAENOTSOCK:
81                         errno = EBADFD;
82                         break;
83                 case WSAEOPNOTSUPP:
84                         errno = EOPNOTSUPP;
85                         break;
86                 case WSAEWOULDBLOCK:
87                         errno = EWOULDBLOCK;
88                         break;
89                 case WSAEACCES:
90                         errno = EACCES;
91                         break;
92                 case WSAENOTCONN:
93                 case WSAENETRESET:
94                 case WSAECONNRESET:
95                 case WSAESHUTDOWN:
96                 case WSAECONNABORTED:
97                 case WSAEDISCON:
98                         errno = ECONNREFUSED;           /* ENOTCONN? */
99                         break;
100                 default:
101                         ereport(NOTICE,
102                                         (errmsg_internal("unrecognized win32 socket error code: %d", WSAGetLastError())));
103                         errno = EINVAL;
104         }
105 }
106
107 static int
108 pgwin32_poll_signals(void)
109 {
110         if (UNBLOCKED_SIGNAL_QUEUE())
111         {
112                 pgwin32_dispatch_queued_signals();
113                 errno = EINTR;
114                 return 1;
115         }
116         return 0;
117 }
118
119 static int
120 isDataGram(SOCKET s)
121 {
122         int                     type;
123         int                     typelen = sizeof(type);
124
125         if (getsockopt(s, SOL_SOCKET, SO_TYPE, (char *) &type, &typelen))
126                 return 1;
127
128         return (type == SOCK_DGRAM) ? 1 : 0;
129 }
130
131 int
132 pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
133 {
134         static HANDLE waitevent = INVALID_HANDLE_VALUE;
135         static SOCKET current_socket = -1;
136         static int      isUDP = 0;
137         HANDLE          events[2];
138         int                     r;
139
140         /* Create an event object just once and use it on all future calls */
141         if (waitevent == INVALID_HANDLE_VALUE)
142         {
143                 waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
144
145                 if (waitevent == INVALID_HANDLE_VALUE)
146                         ereport(ERROR,
147                                         (errmsg_internal("could not create socket waiting event: error code %lu", GetLastError())));
148         }
149         else if (!ResetEvent(waitevent))
150                 ereport(ERROR,
151                                 (errmsg_internal("could not reset socket waiting event: error code %lu", GetLastError())));
152
153         /*
154          * Track whether socket is UDP or not.  (NB: most likely, this is both
155          * useless and wrong; there is no reason to think that the behavior of
156          * WSAEventSelect is different for TCP and UDP.)
157          */
158         if (current_socket != s)
159                 isUDP = isDataGram(s);
160         current_socket = s;
161
162         /*
163          * Attach event to socket.  NOTE: we must detach it again before returning,
164          * since other bits of code may try to attach other events to the socket.
165          */
166         if (WSAEventSelect(s, waitevent, what) != 0)
167         {
168                 TranslateSocketError();
169                 return 0;
170         }
171
172         events[0] = pgwin32_signal_event;
173         events[1] = waitevent;
174
175         /*
176          * Just a workaround of unknown locking problem with writing in UDP socket
177          * under high load: Client's pgsql backend sleeps infinitely in
178          * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
179          * So, we will wait with small timeout(0.1 sec) and if sockect is still
180          * blocked, try WSASend (see comments in pgwin32_select) and wait again.
181          */
182         if ((what & FD_WRITE) && isUDP)
183         {
184                 for (;;)
185                 {
186                         r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
187
188                         if (r == WAIT_TIMEOUT)
189                         {
190                                 char            c;
191                                 WSABUF          buf;
192                                 DWORD           sent;
193
194                                 buf.buf = &c;
195                                 buf.len = 0;
196
197                                 r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
198                                 if (r == 0)             /* Completed - means things are fine! */
199                                 {
200                                         WSAEventSelect(s, NULL, 0);
201                                         return 1;
202                                 }
203                                 else if (WSAGetLastError() != WSAEWOULDBLOCK)
204                                 {
205                                         TranslateSocketError();
206                                         WSAEventSelect(s, NULL, 0);
207                                         return 0;
208                                 }
209                         }
210                         else
211                                 break;
212                 }
213         }
214         else
215                 r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);
216
217         WSAEventSelect(s, NULL, 0);
218
219         if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
220         {
221                 pgwin32_dispatch_queued_signals();
222                 errno = EINTR;
223                 return 0;
224         }
225         if (r == WAIT_OBJECT_0 + 1)
226                 return 1;
227         if (r == WAIT_TIMEOUT)
228         {
229                 errno = EWOULDBLOCK;
230                 return 0;
231         }
232         ereport(ERROR,
233                         (errmsg_internal("unrecognized return value from WaitForMultipleObjects: %d (error code %lu)", r, GetLastError())));
234         return 0;
235 }
236
237 /*
238  * Create a socket, setting it to overlapped and non-blocking
239  */
240 SOCKET
241 pgwin32_socket(int af, int type, int protocol)
242 {
243         SOCKET          s;
244         unsigned long on = 1;
245
246         s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
247         if (s == INVALID_SOCKET)
248         {
249                 TranslateSocketError();
250                 return INVALID_SOCKET;
251         }
252
253         if (ioctlsocket(s, FIONBIO, &on))
254         {
255                 TranslateSocketError();
256                 return INVALID_SOCKET;
257         }
258         errno = 0;
259
260         return s;
261 }
262
263
264 SOCKET
265 pgwin32_accept(SOCKET s, struct sockaddr * addr, int *addrlen)
266 {
267         SOCKET          rs;
268
269         /*
270          * Poll for signals, but don't return with EINTR, since we don't handle
271          * that in pqcomm.c
272          */
273         pgwin32_poll_signals();
274
275         rs = WSAAccept(s, addr, addrlen, NULL, 0);
276         if (rs == INVALID_SOCKET)
277         {
278                 TranslateSocketError();
279                 return INVALID_SOCKET;
280         }
281         return rs;
282 }
283
284
285 /* No signal delivery during connect. */
286 int
287 pgwin32_connect(SOCKET s, const struct sockaddr * addr, int addrlen)
288 {
289         int                     r;
290
291         r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
292         if (r == 0)
293                 return 0;
294
295         if (WSAGetLastError() != WSAEWOULDBLOCK)
296         {
297                 TranslateSocketError();
298                 return -1;
299         }
300
301         while (pgwin32_waitforsinglesocket(s, FD_CONNECT, INFINITE) == 0)
302         {
303                 /* Loop endlessly as long as we are just delivering signals */
304         }
305
306         return 0;
307 }
308
309 int
310 pgwin32_recv(SOCKET s, char *buf, int len, int f)
311 {
312         WSABUF          wbuf;
313         int                     r;
314         DWORD           b;
315         DWORD           flags = f;
316         int                     n;
317
318         if (pgwin32_poll_signals())
319                 return -1;
320
321         wbuf.len = len;
322         wbuf.buf = buf;
323
324         r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
325         if (r != SOCKET_ERROR && b > 0)
326                 /* Read succeeded right away */
327                 return b;
328
329         if (r == SOCKET_ERROR &&
330                 WSAGetLastError() != WSAEWOULDBLOCK)
331         {
332                 TranslateSocketError();
333                 return -1;
334         }
335
336         if (pgwin32_noblock)
337         {
338                 /*
339                  * No data received, and we are in "emulated non-blocking mode", so
340                  * return indicating that we'd block if we were to continue.
341                  */
342                 errno = EWOULDBLOCK;
343                 return -1;
344         }
345
346         /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
347
348         for (n = 0; n < 5; n++)
349         {
350                 if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT,
351                                                                                 INFINITE) == 0)
352                         return -1;                      /* errno already set */
353
354                 r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
355                 if (r == SOCKET_ERROR)
356                 {
357                         if (WSAGetLastError() == WSAEWOULDBLOCK)
358                         {
359                                 /*
360                                  * There seem to be cases on win2k (at least) where WSARecv
361                                  * can return WSAEWOULDBLOCK even when
362                                  * pgwin32_waitforsinglesocket claims the socket is readable.
363                                  * In this case, just sleep for a moment and try again. We try
364                                  * up to 5 times - if it fails more than that it's not likely
365                                  * to ever come back.
366                                  */
367                                 pg_usleep(10000);
368                                 continue;
369                         }
370                         TranslateSocketError();
371                         return -1;
372                 }
373                 return b;
374         }
375         ereport(NOTICE,
376           (errmsg_internal("could not read from ready socket (after retries)")));
377         errno = EWOULDBLOCK;
378         return -1;
379 }
380
381 /*
382  * The second argument to send() is defined by SUS to be a "const void *"
383  * and so we use the same signature here to keep compilers happy when
384  * handling callers.
385  *
386  * But the buf member of a WSABUF struct is defined as "char *", so we cast
387  * the second argument to that here when assigning it, also to keep compilers
388  * happy.
389  */
390
391 int
392 pgwin32_send(SOCKET s, const void *buf, int len, int flags)
393 {
394         WSABUF          wbuf;
395         int                     r;
396         DWORD           b;
397
398         if (pgwin32_poll_signals())
399                 return -1;
400
401         wbuf.len = len;
402         wbuf.buf = (char *) buf;
403
404         /*
405          * Readiness of socket to send data to UDP socket may be not true: socket
406          * can become busy again! So loop until send or error occurs.
407          */
408         for (;;)
409         {
410                 r = WSASend(s, &wbuf, 1, &b, flags, NULL, NULL);
411                 if (r != SOCKET_ERROR && b > 0)
412                         /* Write succeeded right away */
413                         return b;
414
415                 if (r == SOCKET_ERROR &&
416                         WSAGetLastError() != WSAEWOULDBLOCK)
417                 {
418                         TranslateSocketError();
419                         return -1;
420                 }
421
422                 if (pgwin32_noblock)
423                 {
424                         /*
425                          * No data sent, and we are in "emulated non-blocking mode", so
426                          * return indicating that we'd block if we were to continue.
427                          */
428                         errno = EWOULDBLOCK;
429                         return -1;
430                 }
431
432                 /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
433
434                 if (pgwin32_waitforsinglesocket(s, FD_WRITE | FD_CLOSE, INFINITE) == 0)
435                         return -1;
436         }
437
438         return -1;
439 }
440
441
442 /*
443  * Wait for activity on one or more sockets.
444  * While waiting, allow signals to run
445  *
446  * NOTE! Currently does not implement exceptfds check,
447  * since it is not used in postgresql!
448  */
449 int
450 pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
451 {
452         WSAEVENT        events[FD_SETSIZE * 2]; /* worst case is readfds totally
453                                                                                  * different from writefds, so
454                                                                                  * 2*FD_SETSIZE sockets */
455         SOCKET          sockets[FD_SETSIZE * 2];
456         int                     numevents = 0;
457         int                     i;
458         int                     r;
459         DWORD           timeoutval = WSA_INFINITE;
460         FD_SET          outreadfds;
461         FD_SET          outwritefds;
462         int                     nummatches = 0;
463
464         Assert(exceptfds == NULL);
465
466         if (pgwin32_poll_signals())
467                 return -1;
468
469         FD_ZERO(&outreadfds);
470         FD_ZERO(&outwritefds);
471
472         /*
473          * Write FDs are different in the way that it is only flagged by
474          * WSASelectEvent() if we have tried to write to them first. So try an
475          * empty write
476          */
477         if (writefds)
478         {
479                 for (i = 0; i < writefds->fd_count; i++)
480                 {
481                         char            c;
482                         WSABUF          buf;
483                         DWORD           sent;
484
485                         buf.buf = &c;
486                         buf.len = 0;
487
488                         r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
489                         if (r == 0)                     /* Completed - means things are fine! */
490                                 FD_SET(writefds->fd_array[i], &outwritefds);
491
492                         else
493                         {                                       /* Not completed */
494                                 if (WSAGetLastError() != WSAEWOULDBLOCK)
495
496                                         /*
497                                          * Not completed, and not just "would block", so an error
498                                          * occurred
499                                          */
500                                         FD_SET(writefds->fd_array[i], &outwritefds);
501                         }
502                 }
503                 if (outwritefds.fd_count > 0)
504                 {
505                         memcpy(writefds, &outwritefds, sizeof(fd_set));
506                         if (readfds)
507                                 FD_ZERO(readfds);
508                         return outwritefds.fd_count;
509                 }
510         }
511
512
513         /* Now set up for an actual select */
514
515         if (timeout != NULL)
516         {
517                 /* timeoutval is in milliseconds */
518                 timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
519         }
520
521         if (readfds != NULL)
522         {
523                 for (i = 0; i < readfds->fd_count; i++)
524                 {
525                         events[numevents] = WSACreateEvent();
526                         sockets[numevents] = readfds->fd_array[i];
527                         numevents++;
528                 }
529         }
530         if (writefds != NULL)
531         {
532                 for (i = 0; i < writefds->fd_count; i++)
533                 {
534                         if (!readfds ||
535                                 !FD_ISSET(writefds->fd_array[i], readfds))
536                         {
537                                 /* If the socket is not in the read list */
538                                 events[numevents] = WSACreateEvent();
539                                 sockets[numevents] = writefds->fd_array[i];
540                                 numevents++;
541                         }
542                 }
543         }
544
545         for (i = 0; i < numevents; i++)
546         {
547                 int                     flags = 0;
548
549                 if (readfds && FD_ISSET(sockets[i], readfds))
550                         flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
551
552                 if (writefds && FD_ISSET(sockets[i], writefds))
553                         flags |= FD_WRITE | FD_CLOSE;
554
555                 if (WSAEventSelect(sockets[i], events[i], flags) != 0)
556                 {
557                         TranslateSocketError();
558                         /* release already-assigned event objects */
559                         while (--i >= 0)
560                                 WSAEventSelect(sockets[i], NULL, 0);
561                         for (i = 0; i < numevents; i++)
562                                 WSACloseEvent(events[i]);
563                         return -1;
564                 }
565         }
566
567         events[numevents] = pgwin32_signal_event;
568         r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
569         if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
570         {
571                 /*
572                  * We scan all events, even those not signalled, in case more than one
573                  * event has been tagged but Wait.. can only return one.
574                  */
575                 WSANETWORKEVENTS resEvents;
576
577                 for (i = 0; i < numevents; i++)
578                 {
579                         ZeroMemory(&resEvents, sizeof(resEvents));
580                         if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) != 0)
581                                 elog(ERROR, "failed to enumerate network events: error code %u",
582                                          WSAGetLastError());
583                         /* Read activity? */
584                         if (readfds && FD_ISSET(sockets[i], readfds))
585                         {
586                                 if ((resEvents.lNetworkEvents & FD_READ) ||
587                                         (resEvents.lNetworkEvents & FD_ACCEPT) ||
588                                         (resEvents.lNetworkEvents & FD_CLOSE))
589                                 {
590                                         FD_SET(sockets[i], &outreadfds);
591
592                                         nummatches++;
593                                 }
594                         }
595                         /* Write activity? */
596                         if (writefds && FD_ISSET(sockets[i], writefds))
597                         {
598                                 if ((resEvents.lNetworkEvents & FD_WRITE) ||
599                                         (resEvents.lNetworkEvents & FD_CLOSE))
600                                 {
601                                         FD_SET(sockets[i], &outwritefds);
602
603                                         nummatches++;
604                                 }
605                         }
606                 }
607         }
608
609         /* Clean up all the event objects */
610         for (i = 0; i < numevents; i++)
611         {
612                 WSAEventSelect(sockets[i], NULL, 0);
613                 WSACloseEvent(events[i]);
614         }
615
616         if (r == WSA_WAIT_TIMEOUT)
617         {
618                 if (readfds)
619                         FD_ZERO(readfds);
620                 if (writefds)
621                         FD_ZERO(writefds);
622                 return 0;
623         }
624
625         if (r == WAIT_OBJECT_0 + numevents)
626         {
627                 pgwin32_dispatch_queued_signals();
628                 errno = EINTR;
629                 if (readfds)
630                         FD_ZERO(readfds);
631                 if (writefds)
632                         FD_ZERO(writefds);
633                 return -1;
634         }
635
636         /* Overwrite socket sets with our resulting values */
637         if (readfds)
638                 memcpy(readfds, &outreadfds, sizeof(fd_set));
639         if (writefds)
640                 memcpy(writefds, &outwritefds, sizeof(fd_set));
641         return nummatches;
642 }
643
644
645 /*
646  * Return win32 error string, since strerror can't
647  * handle winsock codes
648  */
649 static char wserrbuf[256];
650 const char *
651 pgwin32_socket_strerror(int err)
652 {
653         static HANDLE handleDLL = INVALID_HANDLE_VALUE;
654
655         if (handleDLL == INVALID_HANDLE_VALUE)
656         {
657                 handleDLL = LoadLibraryEx("netmsg.dll", NULL, DONT_RESOLVE_DLL_REFERENCES | LOAD_LIBRARY_AS_DATAFILE);
658                 if (handleDLL == NULL)
659                         ereport(FATAL,
660                                         (errmsg_internal("could not load netmsg.dll: error code %lu", GetLastError())));
661         }
662
663         ZeroMemory(&wserrbuf, sizeof(wserrbuf));
664         if (FormatMessage(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_FROM_HMODULE,
665                                           handleDLL,
666                                           err,
667                                           MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT),
668                                           wserrbuf,
669                                           sizeof(wserrbuf) - 1,
670                                           NULL) == 0)
671         {
672                 /* Failed to get id */
673                 sprintf(wserrbuf, "unrecognized winsock error %d", err);
674         }
675         return wserrbuf;
676 }