]> granicus.if.org Git - postgresql/blob - src/backend/port/win32/socket.c
Fix infinite sleep and failes of send in Win32.
[postgresql] / src / backend / port / win32 / socket.c
1 /*-------------------------------------------------------------------------
2  *
3  * socket.c
4  *        Microsoft Windows Win32 Socket Functions
5  *
6  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *        $PostgreSQL: pgsql/src/backend/port/win32/socket.c,v 1.14 2006/10/13 13:59:47 teodor Exp $
10  *
11  *-------------------------------------------------------------------------
12  */
13
14 #include "postgres.h"
15
16 #undef socket
17 #undef accept
18 #undef connect
19 #undef select
20 #undef recv
21 #undef send
22
23 /*
24  * Blocking socket functions implemented so they listen on both
25  * the socket and the signal event, required for signal handling.
26  */
27
28 /*
29  * Convert the last socket error code into errno
30  */
31 static void
32 TranslateSocketError(void)
33 {
34         switch (WSAGetLastError())
35         {
36                 case WSANOTINITIALISED:
37                 case WSAENETDOWN:
38                 case WSAEINPROGRESS:
39                 case WSAEINVAL:
40                 case WSAESOCKTNOSUPPORT:
41                 case WSAEFAULT:
42                 case WSAEINVALIDPROVIDER:
43                 case WSAEINVALIDPROCTABLE:
44                 case WSAEMSGSIZE:
45                         errno = EINVAL;
46                         break;
47                 case WSAEAFNOSUPPORT:
48                         errno = EAFNOSUPPORT;
49                         break;
50                 case WSAEMFILE:
51                         errno = EMFILE;
52                         break;
53                 case WSAENOBUFS:
54                         errno = ENOBUFS;
55                         break;
56                 case WSAEPROTONOSUPPORT:
57                 case WSAEPROTOTYPE:
58                         errno = EPROTONOSUPPORT;
59                         break;
60                 case WSAECONNREFUSED:
61                         errno = ECONNREFUSED;
62                         break;
63                 case WSAEINTR:
64                         errno = EINTR;
65                         break;
66                 case WSAENOTSOCK:
67                         errno = EBADFD;
68                         break;
69                 case WSAEOPNOTSUPP:
70                         errno = EOPNOTSUPP;
71                         break;
72                 case WSAEWOULDBLOCK:
73                         errno = EWOULDBLOCK;
74                         break;
75                 case WSAEACCES:
76                         errno = EACCES;
77                         break;
78                 case WSAENOTCONN:
79                 case WSAENETRESET:
80                 case WSAECONNRESET:
81                 case WSAESHUTDOWN:
82                 case WSAECONNABORTED:
83                 case WSAEDISCON:
84                         errno = ECONNREFUSED;           /* ENOTCONN? */
85                         break;
86                 default:
87                         ereport(NOTICE,
88                                         (errmsg_internal("Unknown win32 socket error code: %i", WSAGetLastError())));
89                         errno = EINVAL;
90         }
91 }
92
93 static int
94 pgwin32_poll_signals(void)
95 {
96         if (UNBLOCKED_SIGNAL_QUEUE())
97         {
98                 pgwin32_dispatch_queued_signals();
99                 errno = EINTR;
100                 return 1;
101         }
102         return 0;
103 }
104
105 static int
106 isDataGram(SOCKET s) {
107         int type;
108         int typelen = sizeof(type);
109
110         if ( getsockopt(s, SOL_SOCKET, SO_TYPE, (char*)&type, &typelen) )
111                 return 1;
112
113         return ( type == SOCK_DGRAM ) ? 1 : 0;
114 }
115
116 int
117 pgwin32_waitforsinglesocket(SOCKET s, int what)
118 {
119         static HANDLE waitevent = INVALID_HANDLE_VALUE;
120         static SOCKET current_socket = -1;
121         static int    isUDP = 0;
122         HANDLE          events[2];
123         int                     r;
124
125         if (waitevent == INVALID_HANDLE_VALUE)
126         {
127                 waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
128
129                 if (waitevent == INVALID_HANDLE_VALUE)
130                         ereport(ERROR,
131                                         (errmsg_internal("Failed to create socket waiting event: %i", (int) GetLastError())));
132         }
133         else if (!ResetEvent(waitevent))
134                 ereport(ERROR,
135                                 (errmsg_internal("Failed to reset socket waiting event: %i", (int) GetLastError())));
136
137         /*
138          * make sure we don't multiplex this kernel event object with a different
139          * socket from a previous call
140          */
141
142         if (current_socket != s) 
143         {
144                 if ( current_socket != -1 )
145                         WSAEventSelect(current_socket, waitevent, 0);
146                 isUDP = isDataGram(s);
147         }
148
149         current_socket = s;
150
151         if (WSAEventSelect(s, waitevent, what) == SOCKET_ERROR)
152         {
153                 TranslateSocketError();
154                 return 0;
155         }
156
157         events[0] = pgwin32_signal_event;
158         events[1] = waitevent;
159
160         /* 
161          * Just a workaround of unknown locking problem with writing
162          * in UDP socket under high load: 
163          * Client's pgsql backend sleeps infinitely in 
164          * WaitForMultipleObjectsEx, pgstat process sleeps in 
165          * pgwin32_select().  So, we will wait with small 
166          * timeout(0.1 sec) and if sockect is still blocked, 
167          * try WSASend (see comments in pgwin32_select) and wait again.
168          */
169         if ((what & FD_WRITE) && isUDP)
170         {
171                 for(;;)
172                 {
173                         r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
174
175                         if ( r == WAIT_TIMEOUT )
176                         {
177                                 char        c;
178                                 WSABUF      buf;
179                                 DWORD       sent;
180
181                                 buf.buf = &c;
182                                 buf.len = 0;
183
184                                 r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
185                                 if (r == 0)         /* Completed - means things are fine! */
186                                         return 1;
187                                 else if ( WSAGetLastError() != WSAEWOULDBLOCK )
188                                 {
189                                         TranslateSocketError();
190                                         return 0;
191                                 }
192                         }
193                         else
194                                 break;
195                 }
196         }
197         else
198                 r = WaitForMultipleObjectsEx(2, events, FALSE, INFINITE, TRUE);
199
200         if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
201         {
202                 pgwin32_dispatch_queued_signals();
203                 errno = EINTR;
204                 return 0;
205         }
206         if (r == WAIT_OBJECT_0 + 1)
207                 return 1;
208         ereport(ERROR,
209                         (errmsg_internal("Bad return from WaitForMultipleObjects: %i (%i)", r, (int) GetLastError())));
210         return 0;
211 }
212
213 /*
214  * Create a socket, setting it to overlapped and non-blocking
215  */
216 SOCKET
217 pgwin32_socket(int af, int type, int protocol)
218 {
219         SOCKET          s;
220         unsigned long on = 1;
221
222         s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
223         if (s == INVALID_SOCKET)
224         {
225                 TranslateSocketError();
226                 return INVALID_SOCKET;
227         }
228
229         if (ioctlsocket(s, FIONBIO, &on))
230         {
231                 TranslateSocketError();
232                 return INVALID_SOCKET;
233         }
234         errno = 0;
235
236         return s;
237 }
238
239
240 SOCKET
241 pgwin32_accept(SOCKET s, struct sockaddr * addr, int *addrlen)
242 {
243         SOCKET          rs;
244
245         /*
246          * Poll for signals, but don't return with EINTR, since we don't handle
247          * that in pqcomm.c
248          */
249         pgwin32_poll_signals();
250
251         rs = WSAAccept(s, addr, addrlen, NULL, 0);
252         if (rs == INVALID_SOCKET)
253         {
254                 TranslateSocketError();
255                 return INVALID_SOCKET;
256         }
257         return rs;
258 }
259
260
261 /* No signal delivery during connect. */
262 int
263 pgwin32_connect(SOCKET s, const struct sockaddr * addr, int addrlen)
264 {
265         int                     r;
266
267         r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
268         if (r == 0)
269                 return 0;
270
271         if (WSAGetLastError() != WSAEWOULDBLOCK)
272         {
273                 TranslateSocketError();
274                 return -1;
275         }
276
277         while (pgwin32_waitforsinglesocket(s, FD_CONNECT) == 0)
278         {
279                 /* Loop endlessly as long as we are just delivering signals */
280         }
281
282         return 0;
283 }
284
285 int
286 pgwin32_recv(SOCKET s, char *buf, int len, int f)
287 {
288         WSABUF          wbuf;
289         int                     r;
290         DWORD           b;
291         DWORD           flags = f;
292
293         if (pgwin32_poll_signals())
294                 return -1;
295
296         wbuf.len = len;
297         wbuf.buf = buf;
298
299         r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
300         if (r != SOCKET_ERROR && b > 0)
301                 /* Read succeeded right away */
302                 return b;
303
304         if (r == SOCKET_ERROR &&
305                 WSAGetLastError() != WSAEWOULDBLOCK)
306         {
307                 TranslateSocketError();
308                 return -1;
309         }
310
311         /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
312
313         if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT) == 0)
314                 return -1;
315
316         r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
317         if (r == SOCKET_ERROR)
318         {
319                 TranslateSocketError();
320                 return -1;
321         }
322         return b;
323 }
324
325 int
326 pgwin32_send(SOCKET s, char *buf, int len, int flags)
327 {
328         WSABUF          wbuf;
329         int                     r;
330         DWORD           b;
331
332         if (pgwin32_poll_signals())
333                 return -1;
334
335         wbuf.len = len;
336         wbuf.buf = buf;
337
338         /*
339          * Readiness of socket to send data to UDP socket 
340          * may be not true: socket can become busy again! So loop
341          * until send or error occurs.
342          */
343         for(;;) {
344                 r = WSASend(s, &wbuf, 1, &b, flags, NULL, NULL);
345                 if (r != SOCKET_ERROR && b > 0)
346                         /* Write succeeded right away */
347                         return b;
348
349                 if (r == SOCKET_ERROR &&
350                         WSAGetLastError() != WSAEWOULDBLOCK)
351                 {
352                         TranslateSocketError();
353                         return -1;
354                 }
355
356                 /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
357
358                 if (pgwin32_waitforsinglesocket(s, FD_WRITE | FD_CLOSE) == 0)
359                         return -1;
360         }
361
362         return -1;
363 }
364
365
366 /*
367  * Wait for activity on one or more sockets.
368  * While waiting, allow signals to run
369  *
370  * NOTE! Currently does not implement exceptfds check,
371  * since it is not used in postgresql!
372  */
373 int
374 pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
375 {
376         WSAEVENT        events[FD_SETSIZE * 2]; /* worst case is readfds totally
377                                                                                  * different from writefds, so
378                                                                                  * 2*FD_SETSIZE sockets */
379         SOCKET          sockets[FD_SETSIZE * 2];
380         int                     numevents = 0;
381         int                     i;
382         int                     r;
383         DWORD           timeoutval = WSA_INFINITE;
384         FD_SET          outreadfds;
385         FD_SET          outwritefds;
386         int                     nummatches = 0;
387
388         Assert(exceptfds == NULL);
389
390         if (pgwin32_poll_signals())
391                 return -1;
392
393         FD_ZERO(&outreadfds);
394         FD_ZERO(&outwritefds);
395
396         /*
397          * Write FDs are different in the way that it is only flagged by
398          * WSASelectEvent() if we have tried to write to them first. So try an
399          * empty write
400          */
401         if (writefds)
402         {
403                 for (i = 0; i < writefds->fd_count; i++)
404                 {
405                         char            c;
406                         WSABUF          buf;
407                         DWORD           sent;
408
409                         buf.buf = &c;
410                         buf.len = 0;
411
412                         r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
413                         if (r == 0)                     /* Completed - means things are fine! */
414                                 FD_SET(writefds->fd_array[i], &outwritefds);
415                         else
416                         {                                       /* Not completed */
417                                 if (WSAGetLastError() != WSAEWOULDBLOCK)
418
419                                         /*
420                                          * Not completed, and not just "would block", so an error
421                                          * occured
422                                          */
423                                         FD_SET(writefds->fd_array[i], &outwritefds);
424                         }
425                 }
426                 if (outwritefds.fd_count > 0)
427                 {
428                         memcpy(writefds, &outwritefds, sizeof(fd_set));
429                         if (readfds)
430                                 FD_ZERO(readfds);
431                         return outwritefds.fd_count;
432                 }
433         }
434
435
436         /* Now set up for an actual select */
437
438         if (timeout != NULL)
439         {
440                 /* timeoutval is in milliseconds */
441                 timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
442         }
443
444         if (readfds != NULL)
445         {
446                 for (i = 0; i < readfds->fd_count; i++)
447                 {
448                         events[numevents] = WSACreateEvent();
449                         sockets[numevents] = readfds->fd_array[i];
450                         numevents++;
451                 }
452         }
453         if (writefds != NULL)
454         {
455                 for (i = 0; i < writefds->fd_count; i++)
456                 {
457                         if (!readfds ||
458                                 !FD_ISSET(writefds->fd_array[i], readfds))
459                         {
460                                 /* If the socket is not in the read list */
461                                 events[numevents] = WSACreateEvent();
462                                 sockets[numevents] = writefds->fd_array[i];
463                                 numevents++;
464                         }
465                 }
466         }
467
468         for (i = 0; i < numevents; i++)
469         {
470                 int                     flags = 0;
471
472                 if (readfds && FD_ISSET(sockets[i], readfds))
473                         flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
474
475                 if (writefds && FD_ISSET(sockets[i], writefds))
476                         flags |= FD_WRITE | FD_CLOSE;
477
478                 if (WSAEventSelect(sockets[i], events[i], flags) == SOCKET_ERROR)
479                 {
480                         TranslateSocketError();
481                         for (i = 0; i < numevents; i++)
482                                 WSACloseEvent(events[i]);
483                         return -1;
484                 }
485         }
486
487         events[numevents] = pgwin32_signal_event;
488         r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
489         if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
490         {
491                 /*
492                  * We scan all events, even those not signalled, in case more than one
493                  * event has been tagged but Wait.. can only return one.
494                  */
495                 WSANETWORKEVENTS resEvents;
496
497                 for (i = 0; i < numevents; i++)
498                 {
499                         ZeroMemory(&resEvents, sizeof(resEvents));
500                         if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) == SOCKET_ERROR)
501                                 ereport(FATAL,
502                                                 (errmsg_internal("failed to enumerate network events: %i", (int) GetLastError())));
503                         /* Read activity? */
504                         if (readfds && FD_ISSET(sockets[i], readfds))
505                         {
506                                 if ((resEvents.lNetworkEvents & FD_READ) ||
507                                         (resEvents.lNetworkEvents & FD_ACCEPT) ||
508                                         (resEvents.lNetworkEvents & FD_CLOSE))
509                                 {
510                                         FD_SET(sockets[i], &outreadfds);
511                                         nummatches++;
512                                 }
513                         }
514                         /* Write activity? */
515                         if (writefds && FD_ISSET(sockets[i], writefds))
516                         {
517                                 if ((resEvents.lNetworkEvents & FD_WRITE) ||
518                                         (resEvents.lNetworkEvents & FD_CLOSE))
519                                 {
520                                         FD_SET(sockets[i], &outwritefds);
521                                         nummatches++;
522                                 }
523                         }
524                 }
525         }
526
527         /* Clean up all handles */
528         for (i = 0; i < numevents; i++)
529         {
530                 WSAEventSelect(sockets[i], events[i], 0);
531                 WSACloseEvent(events[i]);
532         }
533
534         if (r == WSA_WAIT_TIMEOUT)
535         {
536                 if (readfds)
537                         FD_ZERO(readfds);
538                 if (writefds)
539                         FD_ZERO(writefds);
540                 return 0;
541         }
542
543         if (r == WAIT_OBJECT_0 + numevents)
544         {
545                 pgwin32_dispatch_queued_signals();
546                 errno = EINTR;
547                 if (readfds)
548                         FD_ZERO(readfds);
549                 if (writefds)
550                         FD_ZERO(writefds);
551                 return -1;
552         }
553
554         /* Overwrite socket sets with our resulting values */
555         if (readfds)
556                 memcpy(readfds, &outreadfds, sizeof(fd_set));
557         if (writefds)
558                 memcpy(writefds, &outwritefds, sizeof(fd_set));
559         return nummatches;
560 }
561
562
563 /*
564  * Return win32 error string, since strerror can't
565  * handle winsock codes
566  */
567 static char wserrbuf[256];
568 const char *
569 pgwin32_socket_strerror(int err)
570 {
571         static HANDLE handleDLL = INVALID_HANDLE_VALUE;
572
573         if (handleDLL == INVALID_HANDLE_VALUE)
574         {
575                 handleDLL = LoadLibraryEx("netmsg.dll", NULL, DONT_RESOLVE_DLL_REFERENCES | LOAD_LIBRARY_AS_DATAFILE);
576                 if (handleDLL == NULL)
577                         ereport(FATAL,
578                                         (errmsg_internal("Failed to load netmsg.dll: %i", (int) GetLastError())));
579         }
580
581         ZeroMemory(&wserrbuf, sizeof(wserrbuf));
582         if (FormatMessage(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_FROM_HMODULE,
583                                           handleDLL,
584                                           err,
585                                           MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
586                                           wserrbuf,
587                                           sizeof(wserrbuf) - 1,
588                                           NULL) == 0)
589         {
590                 /* Failed to get id */
591                 sprintf(wserrbuf, "Unknown winsock error %i", err);
592         }
593         return wserrbuf;
594 }