granicus.if.org Git - apache/blob - modules/metadata/mod_unique_id.c

   1 /* ====================================================================
   2  * The Apache Software License, Version 1.1
   3  *
   4  * Copyright (c) 2000 The Apache Software Foundation.  All rights
   5  * reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  *
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in
  16  *    the documentation and/or other materials provided with the
  17  *    distribution.
  18  *
  19  * 3. The end-user documentation included with the redistribution,
  20  *    if any, must include the following acknowledgment:
  21  *       "This product includes software developed by the
  22  *        Apache Software Foundation (http://www.apache.org/)."
  23  *    Alternately, this acknowledgment may appear in the software itself,
  24  *    if and wherever such third-party acknowledgments normally appear.
  25  *
  26  * 4. The names "Apache" and "Apache Software Foundation" must
  27  *    not be used to endorse or promote products derived from this
  28  *    software without prior written permission. For written
  29  *    permission, please contact apache@apache.org.
  30  *
  31  * 5. Products derived from this software may not be called "Apache",
  32  *    nor may "Apache" appear in their name, without prior written
  33  *    permission of the Apache Software Foundation.
  34  *
  35  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46  * SUCH DAMAGE.
  47  * ====================================================================
  48  *
  49  * This software consists of voluntary contributions made by many
  50  * individuals on behalf of the Apache Software Foundation.  For more
  51  * information on the Apache Software Foundation, please see
  52  * <http://www.apache.org/>.
  53  *
  54  * Portions of this software are based upon public domain software
  55  * originally written at the National Center for Supercomputing Applications,
  56  * University of Illinois, Urbana-Champaign.
  57  */
  58
  59 /*
  60  * mod_unique_id.c: generate a unique identifier for each request
  61  *
  62  * Original author: Dean Gaudet <dgaudet@arctic.org>
  63  * UUencoding modified by: Alvaro Martinez Echevarria <alvaro@lander.es>
  64  */
  65
  66 #include "httpd.h"
  67 #include "http_config.h"
  68 #include "http_log.h"
  69
  70 typedef struct {
  71     unsigned int stamp;
  72     unsigned int in_addr;
  73     unsigned int pid;
  74     unsigned short counter;
  75     unsigned int thread_index;
  76 } unique_id_rec;
  77
  78 /* We are using thread_index (the index into the scoreboard), because we
  79  * cannont garauntee the thread_id will be an integer.
  80  *
  81  * This code looks like it won't give a unique ID with the new thread logic.
  82  * It will.  The reason is, we don't increment the counter in a thread_safe
  83  * manner.  Because the thread_index is also in the unique ID now, this does
  84  * not matter.  In order for the id to not be unique, the same thread would
  85  * have to get the same counter twice in the same second.
  86  */
  87
  88 /* Comments:
  89  *
  90  * We want an identifier which is unique across all hits, everywhere.
  91  * "everywhere" includes multiple httpd instances on the same machine, or on
  92  * multiple machines.  Essentially "everywhere" should include all possible
  93  * httpds across all servers at a particular "site".  We make some assumptions
  94  * that if the site has a cluster of machines then their time is relatively
  95  * synchronized.  We also assume that the first address returned by a
  96  * gethostbyname (gethostname()) is unique across all the machines at the
  97  * "site".
  98  *
  99  * We also further assume that pids fit in 32-bits.  If something uses more
 100  * than 32-bits, the fix is trivial, but it requires the unrolled uuencoding
 101  * loop to be extended.  * A similar fix is needed to support multithreaded
 102  * servers, using a pid/tid combo.
 103  *
 104  * Together, the in_addr and pid are assumed to absolutely uniquely identify
 105  * this one child from all other currently running children on all servers
 106  * (including this physical server if it is running multiple httpds) from each
 107  * other.
 108  *
 109  * The stamp and counter are used to distinguish all hits for a particular
 110  * (in_addr,pid) pair.  The stamp is updated using r->request_time,
 111  * saving cpu cycles.  The counter is never reset, and is used to permit up to
 112  * 64k requests in a single second by a single child.
 113  *
 114  * The 112-bits of unique_id_rec are encoded using the alphabet
 115  * [A-Za-z0-9@-], resulting in 19 bytes of printable characters.  That is then
 116  * stuffed into the environment variable UNIQUE_ID so that it is available to
 117  * other modules.  The alphabet choice differs from normal base64 encoding
 118  * [A-Za-z0-9+/] because + and / are special characters in URLs and we want to
 119  * make it easy to use UNIQUE_ID in URLs.
 120  *
 121  * Note that UNIQUE_ID should be considered an opaque token by other
 122  * applications.  No attempt should be made to dissect its internal components.
 123  * It is an abstraction that may change in the future as the needs of this
 124  * module change.
 125  *
 126  * It is highly desirable that identifiers exist for "eternity".  But future
 127  * needs (such as much faster webservers, moving to 64-bit pids, or moving to a
 128  * multithreaded server) may dictate a need to change the contents of
 129  * unique_id_rec.  Such a future implementation should ensure that the first
 130  * field is still a time_t stamp.  By doing that, it is possible for a site to
 131  * have a "flag second" in which they stop all of their old-format servers,
 132  * wait one entire second, and then start all of their new-servers.  This
 133  * procedure will ensure that the new space of identifiers is completely unique
 134  * from the old space.  (Since the first four unencoded bytes always differ.)
 135  */
 136 /*
 137  * Sun Jun  7 05:43:49 CEST 1998 -- Alvaro
 138  * More comments:
 139  * 1) The UUencoding prodecure is now done in a general way, avoiding the problems
 140  * with sizes and paddings that can arise depending on the architecture. Now the
 141  * offsets and sizes of the elements of the unique_id_rec structure are calculated
 142  * in unique_id_global_init; and then used to duplicate the structure without the
 143  * paddings that might exist. The multithreaded server fix should be now very easy:
 144  * just add a new "tid" field to the unique_id_rec structure, and increase by one
 145  * UNIQUE_ID_REC_MAX.
 146  * 2) unique_id_rec.stamp has been changed from "time_t" to "unsigned int", because
 147  * its size is 64bits on some platforms (linux/alpha), and this caused problems with
 148  * htonl/ntohl. Well, this shouldn't be a problem till year 2106.
 149  */
 150
 151 static unsigned global_in_addr;
 152
 153 static unique_id_rec cur_unique_id;
 154
 155 /*
 156  * Number of elements in the structure unique_id_rec.
 157  */
 158 #define UNIQUE_ID_REC_MAX 5
 159
 160 static unsigned short unique_id_rec_offset[UNIQUE_ID_REC_MAX],
 161                       unique_id_rec_size[UNIQUE_ID_REC_MAX],
 162                       unique_id_rec_total_size,
 163                       unique_id_rec_size_uu;
 164
 165 static void unique_id_global_init(ap_pool_t *p, ap_pool_t *plog, ap_pool_t *ptemp, server_rec *main_server)
 166 {
 167 #ifndef MAXHOSTNAMELEN
 168 #define MAXHOSTNAMELEN 256
 169 #endif
 170     char str[MAXHOSTNAMELEN + 1];
 171     struct hostent *hent;
 172 #ifdef HAVE_GETTIMEOFDAY
 173     struct timeval tv;
 174 #endif
 175
 176     /*
 177      * Calculate the sizes and offsets in cur_unique_id.
 178      */
 179     unique_id_rec_offset[0] = XtOffsetOf(unique_id_rec, stamp);
 180     unique_id_rec_size[0] = sizeof(cur_unique_id.stamp);
 181     unique_id_rec_offset[1] = XtOffsetOf(unique_id_rec, in_addr);
 182     unique_id_rec_size[1] = sizeof(cur_unique_id.in_addr);
 183     unique_id_rec_offset[2] = XtOffsetOf(unique_id_rec, pid);
 184     unique_id_rec_size[2] = sizeof(cur_unique_id.pid);
 185     unique_id_rec_offset[3] = XtOffsetOf(unique_id_rec, counter);
 186     unique_id_rec_size[3] = sizeof(cur_unique_id.counter);
 187     unique_id_rec_offset[4] = XtOffsetOf(unique_id_rec, thread_index);
 188     unique_id_rec_size[4] = sizeof(cur_unique_id.thread_index);
 189     unique_id_rec_total_size = unique_id_rec_size[0] + unique_id_rec_size[1] +
 190                                unique_id_rec_size[2] + unique_id_rec_size[3] +
 191                                unique_id_rec_size[4];
 192
 193     /*
 194      * Calculate the size of the structure when encoded.
 195      */
 196     unique_id_rec_size_uu = (unique_id_rec_total_size*8+5)/6;
 197
 198     /*
 199      * Now get the global in_addr.  Note that it is not sufficient to use one
 200      * of the addresses from the main_server, since those aren't as likely to
 201      * be unique as the physical address of the machine
 202      */
 203     if (gethostname(str, sizeof(str) - 1) != 0) {
 204         ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ALERT, errno, main_server,
 205           "gethostname: mod_unique_id requires the hostname of the server");
 206         exit(1);
 207     }
 208     str[sizeof(str) - 1] = '\0';
 209
 210     if ((hent = gethostbyname(str)) == NULL) {
 211         ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ALERT, h_errno, main_server,
 212                     "mod_unique_id: unable to gethostbyname(\"%s\")", str);
 213         exit(1);
 214     }
 215
 216     global_in_addr = ((struct in_addr *) hent->h_addr_list[0])->s_addr;
 217
 218     ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_INFO, 0, main_server,
 219                 "mod_unique_id: using ip addr %s",
 220                 inet_ntoa(*(struct in_addr *) hent->h_addr_list[0]));
 221
 222     /*
 223      * If the server is pummelled with restart requests we could possibly end
 224      * up in a situation where we're starting again during the same second
 225      * that has been used in previous identifiers.  Avoid that situation.
 226      *
 227      * In truth, for this to actually happen not only would it have to restart
 228      * in the same second, but it would have to somehow get the same pids as
 229      * one of the other servers that was running in that second. Which would
 230      * mean a 64k wraparound on pids ... not very likely at all.
 231      *
 232      * But protecting against it is relatively cheap.  We just sleep into the
 233      * next second.
 234      */
 235 #ifndef HAVE_GETTIMEOFDAY
 236     sleep(1);
 237 #else
 238     if (gettimeofday(&tv, NULL) == -1) {
 239         sleep(1);
 240     }
 241     else if (tv.tv_usec) {
 242         tv.tv_sec = 0;
 243         tv.tv_usec = 1000000 - tv.tv_usec;
 244         select(0, NULL, NULL, NULL, &tv);
 245     }
 246 #endif
 247 }
 248
 249 static void unique_id_child_init(ap_pool_t *p, server_rec *s)
 250 {
 251     pid_t pid;
 252 #ifdef HAVE_GETTIMEOFDAY
 253     struct timeval tv;
 254 #endif
 255
 256     /*
 257      * Note that we use the pid because it's possible that on the same
 258      * physical machine there are multiple servers (i.e. using Listen). But
 259      * it's guaranteed that none of them will share the same pids between
 260      * children.
 261      *
 262      * XXX: for multithread this needs to use a pid/tid combo and probably
 263      * needs to be expanded to 32 bits
 264      */
 265     pid = getpid();
 266     cur_unique_id.pid = pid;
 267
 268     /*
 269      * Test our assumption that the pid is 32-bits.  It's possible that
 270      * 64-bit machines will declare pid_t to be 64 bits but only use 32
 271      * of them.  It would have been really nice to test this during
 272      * global_init ... but oh well.
 273      */
 274     if (cur_unique_id.pid != pid) {
 275         ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_CRIT, 0, s,
 276                     "oh no! pids are greater than 32-bits!  I'm broken!");
 277     }
 278
 279     cur_unique_id.in_addr = global_in_addr;
 280
 281     /*
 282      * If we use 0 as the initial counter we have a little less protection
 283      * against restart problems, and a little less protection against a clock
 284      * going backwards in time.
 285      */
 286 #ifdef HAVE_GETTIMEOFDAY
 287     if (gettimeofday(&tv, NULL) == -1) {
 288         cur_unique_id.counter = 0;
 289     }
 290     else {
 291         /* Some systems have very low variance on the low end of their
 292          * system counter, defend against that.
 293          */
 294         cur_unique_id.counter = tv.tv_usec / 10;
 295     }
 296 #else
 297     cur_unique_id.counter = 0;
 298 #endif
 299
 300     /*
 301      * We must always use network ordering for these bytes, so that
 302      * identifiers are comparable between machines of different byte
 303      * orderings.  Note in_addr is already in network order.
 304      */
 305     cur_unique_id.pid = htonl(cur_unique_id.pid);
 306     cur_unique_id.counter = htons(cur_unique_id.counter);
 307 }
 308
 309 /* NOTE: This is *NOT* the same encoding used by base64encode ... the last two
 310  * characters should be + and /.  But those two characters have very special
 311  * meanings in URLs, and we want to make it easy to use identifiers in
 312  * URLs.  So we replace them with @ and -.
 313  */
 314 static const char uuencoder[64] = {
 315     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
 316     'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 317     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 318     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
 319     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '@', '-',
 320 };
 321
 322 static int gen_unique_id(request_rec *r)
 323 {
 324     char *str;
 325     /*
 326      * Buffer padded with two final bytes, used to copy the unique_id_red
 327      * structure without the internal paddings that it could have.
 328      */
 329     unique_id_rec new_unique_id;
 330     struct {
 331         unique_id_rec foo;
 332         unsigned char pad[2];
 333     } paddedbuf;
 334     unsigned char *x,*y;
 335     unsigned short counter;
 336     const char *e;
 337     int i,j,k;
 338
 339     /* copy the unique_id if this is an internal redirect (we're never
 340      * actually called for sub requests, so we don't need to test for
 341      * them) */
 342     if (r->prev && (e = ap_table_get(r->subprocess_env, "REDIRECT_UNIQUE_ID"))) {
 343         ap_table_setn(r->subprocess_env, "UNIQUE_ID", e);
 344         return DECLINED;
 345     }
 346
 347     new_unique_id.in_addr = cur_unique_id.in_addr;
 348     new_unique_id.pid = cur_unique_id.pid;
 349     new_unique_id.counter = cur_unique_id.counter;
 350
 351     new_unique_id.stamp = htonl((unsigned int)r->request_time);
 352     new_unique_id.thread_index = htonl((unsigned int)r->connection->id);
 353
 354     /* we'll use a temporal buffer to avoid uuencoding the possible internal
 355      * paddings of the original structure */
 356     x = (unsigned char *) &paddedbuf;
 357     y = (unsigned char *) &new_unique_id;
 358     k = 0;
 359     for (i = 0; i < UNIQUE_ID_REC_MAX; i++) {
 360         y = ((unsigned char *) &new_unique_id) + unique_id_rec_offset[i];
 361         for (j = 0; j < unique_id_rec_size[i]; j++, k++) {
 362             x[k] = y[j];
 363         }
 364     }
 365     /*
 366      * We reset two more bytes just in case padding is needed for the uuencoding.
 367      */
 368     x[k++] = '\0';
 369     x[k++] = '\0';
 370
 371     /* alloc str and do the uuencoding */
 372     str = (char *)ap_palloc(r->pool, unique_id_rec_size_uu + 1);
 373     k = 0;
 374     for (i = 0; i < unique_id_rec_total_size; i += 3) {
 375         y = x + i;
 376         str[k++] = uuencoder[y[0] >> 2];
 377         str[k++] = uuencoder[((y[0] & 0x03) << 4) | ((y[1] & 0xf0) >> 4)];
 378         if (k == unique_id_rec_size_uu) break;
 379         str[k++] = uuencoder[((y[1] & 0x0f) << 2) | ((y[2] & 0xc0) >> 6)];
 380         if (k == unique_id_rec_size_uu) break;
 381         str[k++] = uuencoder[y[2] & 0x3f];
 382     }
 383     str[k++] = '\0';
 384
 385     /* set the environment variable */
 386     ap_table_setn(r->subprocess_env, "UNIQUE_ID", str);
 387
 388     /* and increment the identifier for the next call */
 389
 390     counter = ntohs(new_unique_id.counter) + 1;
 391     cur_unique_id.counter = htons(counter);
 392
 393     return DECLINED;
 394 }
 395
 396 static void register_hooks(void)
 397 {
 398     ap_hook_post_config(unique_id_global_init, NULL, NULL, AP_HOOK_MIDDLE);
 399     ap_hook_child_init(unique_id_child_init, NULL, NULL, AP_HOOK_MIDDLE);
 400     ap_hook_post_read_request(gen_unique_id, NULL, NULL, AP_HOOK_MIDDLE);
 401 }
 402
 403 module MODULE_EXPORT_VAR unique_id_module = {
 404     STANDARD20_MODULE_STUFF,
 405     NULL,                       /* dir config creater */
 406     NULL,                       /* dir merger --- default is to override */
 407     NULL,                       /* server config */
 408     NULL,                       /* merge server configs */
 409     NULL,                       /* command ap_table_t */
 410     NULL,                       /* handlers */
 411     register_hooks              /* register hooks */
 412 };