]> granicus.if.org Git - postgresql/blob - src/backend/port/win32/socket.c
Remove cvs keywords from all files.
[postgresql] / src / backend / port / win32 / socket.c
1 /*-------------------------------------------------------------------------
2  *
3  * socket.c
4  *        Microsoft Windows Win32 Socket Functions
5  *
6  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *        src/backend/port/win32/socket.c
10  *
11  *-------------------------------------------------------------------------
12  */
13
14 #include "postgres.h"
15
16 /*
17  * Indicate if pgwin32_recv() should operate in non-blocking mode.
18  *
19  * Since the socket emulation layer always sets the actual socket to
20  * non-blocking mode in order to be able to deliver signals, we must
21  * specify this in a separate flag if we actually need non-blocking
22  * operation.
23  *
24  * This flag changes the behaviour *globally* for all socket operations,
25  * so it should only be set for very short periods of time.
26  */
27 int                     pgwin32_noblock = 0;
28
29 #undef socket
30 #undef accept
31 #undef connect
32 #undef select
33 #undef recv
34 #undef send
35
36 /*
37  * Blocking socket functions implemented so they listen on both
38  * the socket and the signal event, required for signal handling.
39  */
40
41 /*
42  * Convert the last socket error code into errno
43  */
44 static void
45 TranslateSocketError(void)
46 {
47         switch (WSAGetLastError())
48         {
49                 case WSANOTINITIALISED:
50                 case WSAENETDOWN:
51                 case WSAEINPROGRESS:
52                 case WSAEINVAL:
53                 case WSAESOCKTNOSUPPORT:
54                 case WSAEFAULT:
55                 case WSAEINVALIDPROVIDER:
56                 case WSAEINVALIDPROCTABLE:
57                 case WSAEMSGSIZE:
58                         errno = EINVAL;
59                         break;
60                 case WSAEAFNOSUPPORT:
61                         errno = EAFNOSUPPORT;
62                         break;
63                 case WSAEMFILE:
64                         errno = EMFILE;
65                         break;
66                 case WSAENOBUFS:
67                         errno = ENOBUFS;
68                         break;
69                 case WSAEPROTONOSUPPORT:
70                 case WSAEPROTOTYPE:
71                         errno = EPROTONOSUPPORT;
72                         break;
73                 case WSAECONNREFUSED:
74                         errno = ECONNREFUSED;
75                         break;
76                 case WSAEINTR:
77                         errno = EINTR;
78                         break;
79                 case WSAENOTSOCK:
80                         errno = EBADFD;
81                         break;
82                 case WSAEOPNOTSUPP:
83                         errno = EOPNOTSUPP;
84                         break;
85                 case WSAEWOULDBLOCK:
86                         errno = EWOULDBLOCK;
87                         break;
88                 case WSAEACCES:
89                         errno = EACCES;
90                         break;
91                 case WSAENOTCONN:
92                 case WSAENETRESET:
93                 case WSAECONNRESET:
94                 case WSAESHUTDOWN:
95                 case WSAECONNABORTED:
96                 case WSAEDISCON:
97                         errno = ECONNREFUSED;           /* ENOTCONN? */
98                         break;
99                 default:
100                         ereport(NOTICE,
101                                         (errmsg_internal("Unknown win32 socket error code: %i", WSAGetLastError())));
102                         errno = EINVAL;
103         }
104 }
105
106 static int
107 pgwin32_poll_signals(void)
108 {
109         if (UNBLOCKED_SIGNAL_QUEUE())
110         {
111                 pgwin32_dispatch_queued_signals();
112                 errno = EINTR;
113                 return 1;
114         }
115         return 0;
116 }
117
118 static int
119 isDataGram(SOCKET s)
120 {
121         int                     type;
122         int                     typelen = sizeof(type);
123
124         if (getsockopt(s, SOL_SOCKET, SO_TYPE, (char *) &type, &typelen))
125                 return 1;
126
127         return (type == SOCK_DGRAM) ? 1 : 0;
128 }
129
130 int
131 pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
132 {
133         static HANDLE waitevent = INVALID_HANDLE_VALUE;
134         static SOCKET current_socket = -1;
135         static int      isUDP = 0;
136         HANDLE          events[2];
137         int                     r;
138
139         if (waitevent == INVALID_HANDLE_VALUE)
140         {
141                 waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
142
143                 if (waitevent == INVALID_HANDLE_VALUE)
144                         ereport(ERROR,
145                                         (errmsg_internal("Failed to create socket waiting event: %i", (int) GetLastError())));
146         }
147         else if (!ResetEvent(waitevent))
148                 ereport(ERROR,
149                                 (errmsg_internal("Failed to reset socket waiting event: %i", (int) GetLastError())));
150
151         /*
152          * make sure we don't multiplex this kernel event object with a different
153          * socket from a previous call
154          */
155
156         if (current_socket != s)
157         {
158                 if (current_socket != -1)
159                         WSAEventSelect(current_socket, waitevent, 0);
160                 isUDP = isDataGram(s);
161         }
162
163         current_socket = s;
164
165         if (WSAEventSelect(s, waitevent, what) == SOCKET_ERROR)
166         {
167                 TranslateSocketError();
168                 return 0;
169         }
170
171         events[0] = pgwin32_signal_event;
172         events[1] = waitevent;
173
174         /*
175          * Just a workaround of unknown locking problem with writing in UDP socket
176          * under high load: Client's pgsql backend sleeps infinitely in
177          * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
178          * So, we will wait with small timeout(0.1 sec) and if sockect is still
179          * blocked, try WSASend (see comments in pgwin32_select) and wait again.
180          */
181         if ((what & FD_WRITE) && isUDP)
182         {
183                 for (;;)
184                 {
185                         r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
186
187                         if (r == WAIT_TIMEOUT)
188                         {
189                                 char            c;
190                                 WSABUF          buf;
191                                 DWORD           sent;
192
193                                 buf.buf = &c;
194                                 buf.len = 0;
195
196                                 r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
197                                 if (r == 0)             /* Completed - means things are fine! */
198                                         return 1;
199                                 else if (WSAGetLastError() != WSAEWOULDBLOCK)
200                                 {
201                                         TranslateSocketError();
202                                         return 0;
203                                 }
204                         }
205                         else
206                                 break;
207                 }
208         }
209         else
210                 r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);
211
212         if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
213         {
214                 pgwin32_dispatch_queued_signals();
215                 errno = EINTR;
216                 return 0;
217         }
218         if (r == WAIT_OBJECT_0 + 1)
219                 return 1;
220         if (r == WAIT_TIMEOUT)
221                 return 0;
222         ereport(ERROR,
223                         (errmsg_internal("Bad return from WaitForMultipleObjects: %i (%i)", r, (int) GetLastError())));
224         return 0;
225 }
226
227 /*
228  * Create a socket, setting it to overlapped and non-blocking
229  */
230 SOCKET
231 pgwin32_socket(int af, int type, int protocol)
232 {
233         SOCKET          s;
234         unsigned long on = 1;
235
236         s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
237         if (s == INVALID_SOCKET)
238         {
239                 TranslateSocketError();
240                 return INVALID_SOCKET;
241         }
242
243         if (ioctlsocket(s, FIONBIO, &on))
244         {
245                 TranslateSocketError();
246                 return INVALID_SOCKET;
247         }
248         errno = 0;
249
250         return s;
251 }
252
253
254 SOCKET
255 pgwin32_accept(SOCKET s, struct sockaddr * addr, int *addrlen)
256 {
257         SOCKET          rs;
258
259         /*
260          * Poll for signals, but don't return with EINTR, since we don't handle
261          * that in pqcomm.c
262          */
263         pgwin32_poll_signals();
264
265         rs = WSAAccept(s, addr, addrlen, NULL, 0);
266         if (rs == INVALID_SOCKET)
267         {
268                 TranslateSocketError();
269                 return INVALID_SOCKET;
270         }
271         return rs;
272 }
273
274
275 /* No signal delivery during connect. */
276 int
277 pgwin32_connect(SOCKET s, const struct sockaddr * addr, int addrlen)
278 {
279         int                     r;
280
281         r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
282         if (r == 0)
283                 return 0;
284
285         if (WSAGetLastError() != WSAEWOULDBLOCK)
286         {
287                 TranslateSocketError();
288                 return -1;
289         }
290
291         while (pgwin32_waitforsinglesocket(s, FD_CONNECT, INFINITE) == 0)
292         {
293                 /* Loop endlessly as long as we are just delivering signals */
294         }
295
296         return 0;
297 }
298
299 int
300 pgwin32_recv(SOCKET s, char *buf, int len, int f)
301 {
302         WSABUF          wbuf;
303         int                     r;
304         DWORD           b;
305         DWORD           flags = f;
306         int                     n;
307
308         if (pgwin32_poll_signals())
309                 return -1;
310
311         wbuf.len = len;
312         wbuf.buf = buf;
313
314         r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
315         if (r != SOCKET_ERROR && b > 0)
316                 /* Read succeeded right away */
317                 return b;
318
319         if (r == SOCKET_ERROR &&
320                 WSAGetLastError() != WSAEWOULDBLOCK)
321         {
322                 TranslateSocketError();
323                 return -1;
324         }
325
326         if (pgwin32_noblock)
327         {
328                 /*
329                  * No data received, and we are in "emulated non-blocking mode", so
330                  * return indicating that we'd block if we were to continue.
331                  */
332                 errno = EWOULDBLOCK;
333                 return -1;
334         }
335
336         /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
337
338         for (n = 0; n < 5; n++)
339         {
340                 if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT,
341                                                                                 INFINITE) == 0)
342                         return -1;                      /* errno already set */
343
344                 r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
345                 if (r == SOCKET_ERROR)
346                 {
347                         if (WSAGetLastError() == WSAEWOULDBLOCK)
348                         {
349                                 /*
350                                  * There seem to be cases on win2k (at least) where WSARecv
351                                  * can return WSAEWOULDBLOCK even when
352                                  * pgwin32_waitforsinglesocket claims the socket is readable.
353                                  * In this case, just sleep for a moment and try again. We try
354                                  * up to 5 times - if it fails more than that it's not likely
355                                  * to ever come back.
356                                  */
357                                 pg_usleep(10000);
358                                 continue;
359                         }
360                         TranslateSocketError();
361                         return -1;
362                 }
363                 return b;
364         }
365         ereport(NOTICE,
366           (errmsg_internal("Failed to read from ready socket (after retries)")));
367         errno = EWOULDBLOCK;
368         return -1;
369 }
370
371 int
372 pgwin32_send(SOCKET s, char *buf, int len, int flags)
373 {
374         WSABUF          wbuf;
375         int                     r;
376         DWORD           b;
377
378         if (pgwin32_poll_signals())
379                 return -1;
380
381         wbuf.len = len;
382         wbuf.buf = buf;
383
384         /*
385          * Readiness of socket to send data to UDP socket may be not true: socket
386          * can become busy again! So loop until send or error occurs.
387          */
388         for (;;)
389         {
390                 r = WSASend(s, &wbuf, 1, &b, flags, NULL, NULL);
391                 if (r != SOCKET_ERROR && b > 0)
392                         /* Write succeeded right away */
393                         return b;
394
395                 if (r == SOCKET_ERROR &&
396                         WSAGetLastError() != WSAEWOULDBLOCK)
397                 {
398                         TranslateSocketError();
399                         return -1;
400                 }
401
402                 /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
403
404                 if (pgwin32_waitforsinglesocket(s, FD_WRITE | FD_CLOSE, INFINITE) == 0)
405                         return -1;
406         }
407
408         return -1;
409 }
410
411
412 /*
413  * Wait for activity on one or more sockets.
414  * While waiting, allow signals to run
415  *
416  * NOTE! Currently does not implement exceptfds check,
417  * since it is not used in postgresql!
418  */
419 int
420 pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
421 {
422         WSAEVENT        events[FD_SETSIZE * 2]; /* worst case is readfds totally
423                                                                                  * different from writefds, so
424                                                                                  * 2*FD_SETSIZE sockets */
425         SOCKET          sockets[FD_SETSIZE * 2];
426         int                     numevents = 0;
427         int                     i;
428         int                     r;
429         DWORD           timeoutval = WSA_INFINITE;
430         FD_SET          outreadfds;
431         FD_SET          outwritefds;
432         int                     nummatches = 0;
433
434         Assert(exceptfds == NULL);
435
436         if (pgwin32_poll_signals())
437                 return -1;
438
439         FD_ZERO(&outreadfds);
440         FD_ZERO(&outwritefds);
441
442         /*
443          * Write FDs are different in the way that it is only flagged by
444          * WSASelectEvent() if we have tried to write to them first. So try an
445          * empty write
446          */
447         if (writefds)
448         {
449                 for (i = 0; i < writefds->fd_count; i++)
450                 {
451                         char            c;
452                         WSABUF          buf;
453                         DWORD           sent;
454
455                         buf.buf = &c;
456                         buf.len = 0;
457
458                         r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
459                         if (r == 0)                     /* Completed - means things are fine! */
460                                 FD_SET(writefds->fd_array[i], &outwritefds);
461
462                         else
463                         {                                       /* Not completed */
464                                 if (WSAGetLastError() != WSAEWOULDBLOCK)
465
466                                         /*
467                                          * Not completed, and not just "would block", so an error
468                                          * occured
469                                          */
470                                         FD_SET(writefds->fd_array[i], &outwritefds);
471                         }
472                 }
473                 if (outwritefds.fd_count > 0)
474                 {
475                         memcpy(writefds, &outwritefds, sizeof(fd_set));
476                         if (readfds)
477                                 FD_ZERO(readfds);
478                         return outwritefds.fd_count;
479                 }
480         }
481
482
483         /* Now set up for an actual select */
484
485         if (timeout != NULL)
486         {
487                 /* timeoutval is in milliseconds */
488                 timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
489         }
490
491         if (readfds != NULL)
492         {
493                 for (i = 0; i < readfds->fd_count; i++)
494                 {
495                         events[numevents] = WSACreateEvent();
496                         sockets[numevents] = readfds->fd_array[i];
497                         numevents++;
498                 }
499         }
500         if (writefds != NULL)
501         {
502                 for (i = 0; i < writefds->fd_count; i++)
503                 {
504                         if (!readfds ||
505                                 !FD_ISSET(writefds->fd_array[i], readfds))
506                         {
507                                 /* If the socket is not in the read list */
508                                 events[numevents] = WSACreateEvent();
509                                 sockets[numevents] = writefds->fd_array[i];
510                                 numevents++;
511                         }
512                 }
513         }
514
515         for (i = 0; i < numevents; i++)
516         {
517                 int                     flags = 0;
518
519                 if (readfds && FD_ISSET(sockets[i], readfds))
520                         flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
521
522                 if (writefds && FD_ISSET(sockets[i], writefds))
523                         flags |= FD_WRITE | FD_CLOSE;
524
525                 if (WSAEventSelect(sockets[i], events[i], flags) == SOCKET_ERROR)
526                 {
527                         TranslateSocketError();
528                         for (i = 0; i < numevents; i++)
529                                 WSACloseEvent(events[i]);
530                         return -1;
531                 }
532         }
533
534         events[numevents] = pgwin32_signal_event;
535         r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
536         if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
537         {
538                 /*
539                  * We scan all events, even those not signalled, in case more than one
540                  * event has been tagged but Wait.. can only return one.
541                  */
542                 WSANETWORKEVENTS resEvents;
543
544                 for (i = 0; i < numevents; i++)
545                 {
546                         ZeroMemory(&resEvents, sizeof(resEvents));
547                         if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) == SOCKET_ERROR)
548                                 ereport(FATAL,
549                                                 (errmsg_internal("failed to enumerate network events: %i", (int) GetLastError())));
550                         /* Read activity? */
551                         if (readfds && FD_ISSET(sockets[i], readfds))
552                         {
553                                 if ((resEvents.lNetworkEvents & FD_READ) ||
554                                         (resEvents.lNetworkEvents & FD_ACCEPT) ||
555                                         (resEvents.lNetworkEvents & FD_CLOSE))
556                                 {
557                                         FD_SET(sockets[i], &outreadfds);
558
559                                         nummatches++;
560                                 }
561                         }
562                         /* Write activity? */
563                         if (writefds && FD_ISSET(sockets[i], writefds))
564                         {
565                                 if ((resEvents.lNetworkEvents & FD_WRITE) ||
566                                         (resEvents.lNetworkEvents & FD_CLOSE))
567                                 {
568                                         FD_SET(sockets[i], &outwritefds);
569
570                                         nummatches++;
571                                 }
572                         }
573                 }
574         }
575
576         /* Clean up all handles */
577         for (i = 0; i < numevents; i++)
578         {
579                 WSAEventSelect(sockets[i], events[i], 0);
580                 WSACloseEvent(events[i]);
581         }
582
583         if (r == WSA_WAIT_TIMEOUT)
584         {
585                 if (readfds)
586                         FD_ZERO(readfds);
587                 if (writefds)
588                         FD_ZERO(writefds);
589                 return 0;
590         }
591
592         if (r == WAIT_OBJECT_0 + numevents)
593         {
594                 pgwin32_dispatch_queued_signals();
595                 errno = EINTR;
596                 if (readfds)
597                         FD_ZERO(readfds);
598                 if (writefds)
599                         FD_ZERO(writefds);
600                 return -1;
601         }
602
603         /* Overwrite socket sets with our resulting values */
604         if (readfds)
605                 memcpy(readfds, &outreadfds, sizeof(fd_set));
606         if (writefds)
607                 memcpy(writefds, &outwritefds, sizeof(fd_set));
608         return nummatches;
609 }
610
611
612 /*
613  * Return win32 error string, since strerror can't
614  * handle winsock codes
615  */
616 static char wserrbuf[256];
617 const char *
618 pgwin32_socket_strerror(int err)
619 {
620         static HANDLE handleDLL = INVALID_HANDLE_VALUE;
621
622         if (handleDLL == INVALID_HANDLE_VALUE)
623         {
624                 handleDLL = LoadLibraryEx("netmsg.dll", NULL, DONT_RESOLVE_DLL_REFERENCES | LOAD_LIBRARY_AS_DATAFILE);
625                 if (handleDLL == NULL)
626                         ereport(FATAL,
627                                         (errmsg_internal("Failed to load netmsg.dll: %i", (int) GetLastError())));
628         }
629
630         ZeroMemory(&wserrbuf, sizeof(wserrbuf));
631         if (FormatMessage(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_FROM_HMODULE,
632                                           handleDLL,
633                                           err,
634                                           MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT),
635                                           wserrbuf,
636                                           sizeof(wserrbuf) - 1,
637                                           NULL) == 0)
638         {
639                 /* Failed to get id */
640                 sprintf(wserrbuf, "Unknown winsock error %i", err);
641         }
642         return wserrbuf;
643 }