granicus.if.org Git - apache/blob - modules/metadata/mod_unique_id.c

   1 /* Copyright 1999-2004 Apache Software Foundation
   2  *
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License.
  14  */
  15
  16 /*
  17  * mod_unique_id.c: generate a unique identifier for each request
  18  *
  19  * Original author: Dean Gaudet <dgaudet@arctic.org>
  20  * UUencoding modified by: Alvaro Martinez Echevarria <alvaro@lander.es>
  21  */
  22
  23 #define APR_WANT_BYTEFUNC   /* for htons() et al */
  24 #include "apr_want.h"
  25 #include "apr_general.h"    /* for APR_OFFSETOF                */
  26 #include "apr_network_io.h"
  27
  28 #include "httpd.h"
  29 #include "http_config.h"
  30 #include "http_log.h"
  31 #include "http_protocol.h"  /* for ap_hook_post_read_request */
  32
  33 #if APR_HAVE_UNISTD_H
  34 #include <unistd.h>         /* for getpid() */
  35 #endif
  36
  37 typedef struct {
  38     unsigned int stamp;
  39     unsigned int in_addr;
  40     unsigned int pid;
  41     unsigned short counter;
  42     unsigned int thread_index;
  43 } unique_id_rec;
  44
  45 /* We are using thread_index (the index into the scoreboard), because we
  46  * cannot guarantee the thread_id will be an integer.
  47  *
  48  * This code looks like it won't give a unique ID with the new thread logic.
  49  * It will.  The reason is, we don't increment the counter in a thread_safe
  50  * manner.  Because the thread_index is also in the unique ID now, this does
  51  * not matter.  In order for the id to not be unique, the same thread would
  52  * have to get the same counter twice in the same second.
  53  */
  54
  55 /* Comments:
  56  *
  57  * We want an identifier which is unique across all hits, everywhere.
  58  * "everywhere" includes multiple httpd instances on the same machine, or on
  59  * multiple machines.  Essentially "everywhere" should include all possible
  60  * httpds across all servers at a particular "site".  We make some assumptions
  61  * that if the site has a cluster of machines then their time is relatively
  62  * synchronized.  We also assume that the first address returned by a
  63  * gethostbyname (gethostname()) is unique across all the machines at the
  64  * "site".
  65  *
  66  * We also further assume that pids fit in 32-bits.  If something uses more
  67  * than 32-bits, the fix is trivial, but it requires the unrolled uuencoding
  68  * loop to be extended.  * A similar fix is needed to support multithreaded
  69  * servers, using a pid/tid combo.
  70  *
  71  * Together, the in_addr and pid are assumed to absolutely uniquely identify
  72  * this one child from all other currently running children on all servers
  73  * (including this physical server if it is running multiple httpds) from each
  74  * other.
  75  *
  76  * The stamp and counter are used to distinguish all hits for a particular
  77  * (in_addr,pid) pair.  The stamp is updated using r->request_time,
  78  * saving cpu cycles.  The counter is never reset, and is used to permit up to
  79  * 64k requests in a single second by a single child.
  80  *
  81  * The 112-bits of unique_id_rec are encoded using the alphabet
  82  * [A-Za-z0-9@-], resulting in 19 bytes of printable characters.  That is then
  83  * stuffed into the environment variable UNIQUE_ID so that it is available to
  84  * other modules.  The alphabet choice differs from normal base64 encoding
  85  * [A-Za-z0-9+/] because + and / are special characters in URLs and we want to
  86  * make it easy to use UNIQUE_ID in URLs.
  87  *
  88  * Note that UNIQUE_ID should be considered an opaque token by other
  89  * applications.  No attempt should be made to dissect its internal components.
  90  * It is an abstraction that may change in the future as the needs of this
  91  * module change.
  92  *
  93  * It is highly desirable that identifiers exist for "eternity".  But future
  94  * needs (such as much faster webservers, moving to 64-bit pids, or moving to a
  95  * multithreaded server) may dictate a need to change the contents of
  96  * unique_id_rec.  Such a future implementation should ensure that the first
  97  * field is still a time_t stamp.  By doing that, it is possible for a site to
  98  * have a "flag second" in which they stop all of their old-format servers,
  99  * wait one entire second, and then start all of their new-servers.  This
 100  * procedure will ensure that the new space of identifiers is completely unique
 101  * from the old space.  (Since the first four unencoded bytes always differ.)
 102  */
 103 /*
 104  * Sun Jun  7 05:43:49 CEST 1998 -- Alvaro
 105  * More comments:
 106  * 1) The UUencoding prodecure is now done in a general way, avoiding the problems
 107  * with sizes and paddings that can arise depending on the architecture. Now the
 108  * offsets and sizes of the elements of the unique_id_rec structure are calculated
 109  * in unique_id_global_init; and then used to duplicate the structure without the
 110  * paddings that might exist. The multithreaded server fix should be now very easy:
 111  * just add a new "tid" field to the unique_id_rec structure, and increase by one
 112  * UNIQUE_ID_REC_MAX.
 113  * 2) unique_id_rec.stamp has been changed from "time_t" to "unsigned int", because
 114  * its size is 64bits on some platforms (linux/alpha), and this caused problems with
 115  * htonl/ntohl. Well, this shouldn't be a problem till year 2106.
 116  */
 117
 118 static unsigned global_in_addr;
 119
 120 static unique_id_rec cur_unique_id;
 121
 122 /*
 123  * Number of elements in the structure unique_id_rec.
 124  */
 125 #define UNIQUE_ID_REC_MAX 5
 126
 127 static unsigned short unique_id_rec_offset[UNIQUE_ID_REC_MAX],
 128                       unique_id_rec_size[UNIQUE_ID_REC_MAX],
 129                       unique_id_rec_total_size,
 130                       unique_id_rec_size_uu;
 131
 132 static int unique_id_global_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
 133 {
 134     char str[APRMAXHOSTLEN + 1];
 135     apr_status_t rv;
 136     char *ipaddrstr;
 137     apr_sockaddr_t *sockaddr;
 138
 139     /*
 140      * Calculate the sizes and offsets in cur_unique_id.
 141      */
 142     unique_id_rec_offset[0] = APR_OFFSETOF(unique_id_rec, stamp);
 143     unique_id_rec_size[0] = sizeof(cur_unique_id.stamp);
 144     unique_id_rec_offset[1] = APR_OFFSETOF(unique_id_rec, in_addr);
 145     unique_id_rec_size[1] = sizeof(cur_unique_id.in_addr);
 146     unique_id_rec_offset[2] = APR_OFFSETOF(unique_id_rec, pid);
 147     unique_id_rec_size[2] = sizeof(cur_unique_id.pid);
 148     unique_id_rec_offset[3] = APR_OFFSETOF(unique_id_rec, counter);
 149     unique_id_rec_size[3] = sizeof(cur_unique_id.counter);
 150     unique_id_rec_offset[4] = APR_OFFSETOF(unique_id_rec, thread_index);
 151     unique_id_rec_size[4] = sizeof(cur_unique_id.thread_index);
 152     unique_id_rec_total_size = unique_id_rec_size[0] + unique_id_rec_size[1] +
 153                                unique_id_rec_size[2] + unique_id_rec_size[3] +
 154                                unique_id_rec_size[4];
 155
 156     /*
 157      * Calculate the size of the structure when encoded.
 158      */
 159     unique_id_rec_size_uu = (unique_id_rec_total_size*8+5)/6;
 160
 161     /*
 162      * Now get the global in_addr.  Note that it is not sufficient to use one
 163      * of the addresses from the main_server, since those aren't as likely to
 164      * be unique as the physical address of the machine
 165      */
 166     if ((rv = apr_gethostname(str, sizeof(str) - 1, p)) != APR_SUCCESS) {
 167         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server,
 168           "mod_unique_id: unable to find hostname of the server");
 169         return HTTP_INTERNAL_SERVER_ERROR;
 170     }
 171
 172     if ((rv = apr_sockaddr_info_get(&sockaddr, str, AF_INET, 0, 0, p)) == APR_SUCCESS) {
 173         global_in_addr = sockaddr->sa.sin.sin_addr.s_addr;
 174     }
 175     else {
 176         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server,
 177                     "mod_unique_id: unable to find IPv4 address of \"%s\"", str);
 178 #if APR_HAVE_IPV6
 179         if ((rv = apr_sockaddr_info_get(&sockaddr, str, AF_INET6, 0, 0, p)) == APR_SUCCESS) {
 180             memcpy(&global_in_addr,
 181                    (char *)sockaddr->ipaddr_ptr + sockaddr->ipaddr_len - sizeof(global_in_addr),
 182                    sizeof(global_in_addr));
 183             ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server,
 184                          "mod_unique_id: using low-order bits of IPv6 address "
 185                          "as if they were unique");
 186         }
 187         else
 188 #endif
 189         return HTTP_INTERNAL_SERVER_ERROR;
 190     }
 191
 192     apr_sockaddr_ip_get(&ipaddrstr, sockaddr);
 193     ap_log_error(APLOG_MARK, APLOG_INFO, 0, main_server,
 194                 "mod_unique_id: using ip addr %s",
 195                  ipaddrstr);
 196
 197     /*
 198      * If the server is pummelled with restart requests we could possibly end
 199      * up in a situation where we're starting again during the same second
 200      * that has been used in previous identifiers.  Avoid that situation.
 201      *
 202      * In truth, for this to actually happen not only would it have to restart
 203      * in the same second, but it would have to somehow get the same pids as
 204      * one of the other servers that was running in that second. Which would
 205      * mean a 64k wraparound on pids ... not very likely at all.
 206      *
 207      * But protecting against it is relatively cheap.  We just sleep into the
 208      * next second.
 209      */
 210     apr_sleep(apr_time_from_sec(1) - apr_time_usec(apr_time_now()));
 211     return OK;
 212 }
 213
 214 static void unique_id_child_init(apr_pool_t *p, server_rec *s)
 215 {
 216     pid_t pid;
 217     apr_time_t tv;
 218
 219     /*
 220      * Note that we use the pid because it's possible that on the same
 221      * physical machine there are multiple servers (i.e. using Listen). But
 222      * it's guaranteed that none of them will share the same pids between
 223      * children.
 224      *
 225      * XXX: for multithread this needs to use a pid/tid combo and probably
 226      * needs to be expanded to 32 bits
 227      */
 228     pid = getpid();
 229     cur_unique_id.pid = pid;
 230
 231     /*
 232      * Test our assumption that the pid is 32-bits.  It's possible that
 233      * 64-bit machines will declare pid_t to be 64 bits but only use 32
 234      * of them.  It would have been really nice to test this during
 235      * global_init ... but oh well.
 236      */
 237     if ((pid_t)cur_unique_id.pid != pid) {
 238         ap_log_error(APLOG_MARK, APLOG_CRIT, 0, s,
 239                     "oh no! pids are greater than 32-bits!  I'm broken!");
 240     }
 241
 242     cur_unique_id.in_addr = global_in_addr;
 243
 244     /*
 245      * If we use 0 as the initial counter we have a little less protection
 246      * against restart problems, and a little less protection against a clock
 247      * going backwards in time.
 248      */
 249     tv = apr_time_now();
 250     /* Some systems have very low variance on the low end of their system
 251      * counter, defend against that.
 252      */
 253     cur_unique_id.counter = (unsigned short)(apr_time_usec(tv) / 10);
 254
 255     /*
 256      * We must always use network ordering for these bytes, so that
 257      * identifiers are comparable between machines of different byte
 258      * orderings.  Note in_addr is already in network order.
 259      */
 260     cur_unique_id.pid = htonl(cur_unique_id.pid);
 261     cur_unique_id.counter = htons(cur_unique_id.counter);
 262 }
 263
 264 /* NOTE: This is *NOT* the same encoding used by base64encode ... the last two
 265  * characters should be + and /.  But those two characters have very special
 266  * meanings in URLs, and we want to make it easy to use identifiers in
 267  * URLs.  So we replace them with @ and -.
 268  */
 269 static const char uuencoder[64] = {
 270     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
 271     'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 272     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
 273     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
 274     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '@', '-',
 275 };
 276
 277 static int gen_unique_id(request_rec *r)
 278 {
 279     char *str;
 280     /*
 281      * Buffer padded with two final bytes, used to copy the unique_id_red
 282      * structure without the internal paddings that it could have.
 283      */
 284     unique_id_rec new_unique_id;
 285     struct {
 286         unique_id_rec foo;
 287         unsigned char pad[2];
 288     } paddedbuf;
 289     unsigned char *x,*y;
 290     unsigned short counter;
 291     const char *e;
 292     int i,j,k;
 293
 294     /* copy the unique_id if this is an internal redirect (we're never
 295      * actually called for sub requests, so we don't need to test for
 296      * them) */
 297     if (r->prev && (e = apr_table_get(r->subprocess_env, "REDIRECT_UNIQUE_ID"))) {
 298         apr_table_setn(r->subprocess_env, "UNIQUE_ID", e);
 299         return DECLINED;
 300     }
 301
 302     new_unique_id.in_addr = cur_unique_id.in_addr;
 303     new_unique_id.pid = cur_unique_id.pid;
 304     new_unique_id.counter = cur_unique_id.counter;
 305
 306     new_unique_id.stamp = htonl((unsigned int)r->request_time);
 307     new_unique_id.thread_index = htonl((unsigned int)r->connection->id);
 308
 309     /* we'll use a temporal buffer to avoid uuencoding the possible internal
 310      * paddings of the original structure */
 311     x = (unsigned char *) &paddedbuf;
 312     y = (unsigned char *) &new_unique_id;
 313     k = 0;
 314     for (i = 0; i < UNIQUE_ID_REC_MAX; i++) {
 315         y = ((unsigned char *) &new_unique_id) + unique_id_rec_offset[i];
 316         for (j = 0; j < unique_id_rec_size[i]; j++, k++) {
 317             x[k] = y[j];
 318         }
 319     }
 320     /*
 321      * We reset two more bytes just in case padding is needed for the uuencoding.
 322      */
 323     x[k++] = '\0';
 324     x[k++] = '\0';
 325
 326     /* alloc str and do the uuencoding */
 327     str = (char *)apr_palloc(r->pool, unique_id_rec_size_uu + 1);
 328     k = 0;
 329     for (i = 0; i < unique_id_rec_total_size; i += 3) {
 330         y = x + i;
 331         str[k++] = uuencoder[y[0] >> 2];
 332         str[k++] = uuencoder[((y[0] & 0x03) << 4) | ((y[1] & 0xf0) >> 4)];
 333         if (k == unique_id_rec_size_uu) break;
 334         str[k++] = uuencoder[((y[1] & 0x0f) << 2) | ((y[2] & 0xc0) >> 6)];
 335         if (k == unique_id_rec_size_uu) break;
 336         str[k++] = uuencoder[y[2] & 0x3f];
 337     }
 338     str[k++] = '\0';
 339
 340     /* set the environment variable */
 341     apr_table_setn(r->subprocess_env, "UNIQUE_ID", str);
 342
 343     /* and increment the identifier for the next call */
 344
 345     counter = ntohs(new_unique_id.counter) + 1;
 346     cur_unique_id.counter = htons(counter);
 347
 348     return DECLINED;
 349 }
 350
 351 static void register_hooks(apr_pool_t *p)
 352 {
 353     ap_hook_post_config(unique_id_global_init, NULL, NULL, APR_HOOK_MIDDLE);
 354     ap_hook_child_init(unique_id_child_init, NULL, NULL, APR_HOOK_MIDDLE);
 355     ap_hook_post_read_request(gen_unique_id, NULL, NULL, APR_HOOK_MIDDLE);
 356 }
 357
 358 module AP_MODULE_DECLARE_DATA unique_id_module = {
 359     STANDARD20_MODULE_STUFF,
 360     NULL,                       /* dir config creater */
 361     NULL,                       /* dir merger --- default is to override */
 362     NULL,                       /* server config */
 363     NULL,                       /* merge server configs */
 364     NULL,                       /* command apr_table_t */
 365     register_hooks              /* register hooks */
 366 };