1 /*-------------------------------------------------------------------------
4 * PostgreSQL generic bitmap set package
6 * A bitmap set can represent any set of nonnegative integers, although
7 * it is mainly intended for sets where the maximum value is not large,
8 * say at most a few hundred. By convention, a NULL pointer is always
9 * accepted by all operations to represent the empty set. (But beware
10 * that this is not the only representation of the empty set. Use
11 * bms_is_empty() in preference to testing for NULL.)
14 * Copyright (c) 2003, PostgreSQL Global Development Group
17 * $Header: /cvsroot/pgsql/src/backend/nodes/bitmapset.c,v 1.2 2003/06/29 23:05:04 tgl Exp $
19 *-------------------------------------------------------------------------
23 #include "nodes/bitmapset.h"
26 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
27 #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
29 #define BITMAPSET_SIZE(nwords) \
30 (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
33 * This is a well-known cute trick for isolating the rightmost one-bit
34 * in a word. It assumes two's complement arithmetic. Consider any
35 * nonzero value, and focus attention on the rightmost one. The value is
38 * where x's are unspecified bits. The two's complement negative is formed
39 * by inverting all the bits and adding one. Inversion gives
41 * where each y is the inverse of the corresponding x. Incrementing gives
43 * and then ANDing with the original value gives
45 * This works for all cases except original value = zero, where of course
49 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
51 #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
55 * Lookup tables to avoid need for bit-by-bit groveling
57 * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
58 * in a nonzero byte value x. The entry for x=0 is never used.
60 * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
62 * We could make these tables larger and reduce the number of iterations
63 * in the functions that use them, but bytewise shifts and masks are
64 * especially fast on many machines, so working a byte at a time seems best.
67 static const uint8 rightmost_one_pos[256] = {
68 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
79 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
80 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
81 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
82 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
83 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
86 static const uint8 number_of_ones[256] = {
87 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
88 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
91 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
92 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
95 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
96 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
99 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
100 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
107 * bms_copy - make a palloc'd copy of a bitmapset
110 bms_copy(const Bitmapset *a)
117 size = BITMAPSET_SIZE(a->nwords);
118 result = (Bitmapset *) palloc(size);
119 memcpy(result, a, size);
124 * bms_equal - are two bitmapsets equal?
126 * This is logical not physical equality; in particular, a NULL pointer will
127 * be reported as equal to a palloc'd value containing no members.
130 bms_equal(const Bitmapset *a, const Bitmapset *b)
132 const Bitmapset *shorter;
133 const Bitmapset *longer;
138 /* Handle cases where either input is NULL */
143 return bms_is_empty(b);
147 return bms_is_empty(a);
149 /* Identify shorter and longer input */
150 if (a->nwords <= b->nwords)
161 shortlen = shorter->nwords;
162 for (i = 0; i < shortlen; i++)
164 if (shorter->words[i] != longer->words[i])
167 longlen = longer->nwords;
168 for (; i < longlen; i++)
170 if (longer->words[i] != 0)
177 * bms_make_singleton - build a bitmapset containing a single member
180 bms_make_singleton(int x)
187 elog(ERROR, "bms_make_singleton: negative set member not allowed");
188 wordnum = WORDNUM(x);
190 result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
191 result->nwords = wordnum + 1;
192 result->words[wordnum] = ((bitmapword) 1 << bitnum);
197 * bms_free - free a bitmapset
199 * Same as pfree except for allowing NULL input
202 bms_free(Bitmapset *a)
210 * These operations all make a freshly palloc'd result,
211 * leaving their inputs untouched
216 * bms_union - set union
219 bms_union(const Bitmapset *a, const Bitmapset *b)
222 const Bitmapset *other;
226 /* Handle cases where either input is NULL */
231 /* Identify shorter and longer input; copy the longer one */
232 if (a->nwords <= b->nwords)
234 result = bms_copy(b);
239 result = bms_copy(a);
242 /* And union the shorter input into the result */
243 otherlen = other->nwords;
244 for (i = 0; i < otherlen; i++)
246 result->words[i] |= other->words[i];
252 * bms_intersect - set intersection
255 bms_intersect(const Bitmapset *a, const Bitmapset *b)
258 const Bitmapset *other;
262 /* Handle cases where either input is NULL */
263 if (a == NULL || b == NULL)
265 /* Identify shorter and longer input; copy the shorter one */
266 if (a->nwords <= b->nwords)
268 result = bms_copy(a);
273 result = bms_copy(b);
276 /* And intersect the longer input with the result */
277 resultlen = result->nwords;
278 for (i = 0; i < resultlen; i++)
280 result->words[i] &= other->words[i];
286 * bms_difference - set difference (ie, A without members of B)
289 bms_difference(const Bitmapset *a, const Bitmapset *b)
295 /* Handle cases where either input is NULL */
300 /* Copy the left input */
301 result = bms_copy(a);
302 /* And remove b's bits from result */
303 shortlen = Min(a->nwords, b->nwords);
304 for (i = 0; i < shortlen; i++)
306 result->words[i] &= ~ b->words[i];
312 * bms_is_subset - is A a subset of B?
315 bms_is_subset(const Bitmapset *a, const Bitmapset *b)
321 /* Handle cases where either input is NULL */
323 return true; /* empty set is a subset of anything */
325 return bms_is_empty(a);
326 /* Check common words */
327 shortlen = Min(a->nwords, b->nwords);
328 for (i = 0; i < shortlen; i++)
330 if ((a->words[i] & ~ b->words[i]) != 0)
333 /* Check extra words */
334 if (a->nwords > b->nwords)
337 for (; i < longlen; i++)
339 if (a->words[i] != 0)
347 * bms_is_member - is X a member of A?
350 bms_is_member(int x, const Bitmapset *a)
355 /* XXX better to just return false for x<0 ? */
357 elog(ERROR, "bms_is_member: negative set member not allowed");
360 wordnum = WORDNUM(x);
362 if (wordnum >= a->nwords)
364 if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
370 * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
373 bms_overlap(const Bitmapset *a, const Bitmapset *b)
378 /* Handle cases where either input is NULL */
379 if (a == NULL || b == NULL)
381 /* Check words in common */
382 shortlen = Min(a->nwords, b->nwords);
383 for (i = 0; i < shortlen; i++)
385 if ((a->words[i] & b->words[i]) != 0)
392 * bms_nonempty_difference - do sets have a nonempty difference?
395 bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
400 /* Handle cases where either input is NULL */
404 return !bms_is_empty(a);
405 /* Check words in common */
406 shortlen = Min(a->nwords, b->nwords);
407 for (i = 0; i < shortlen; i++)
409 if ((a->words[i] & ~ b->words[i]) != 0)
412 /* Check extra words in a */
413 for (; i < a->nwords; i++)
415 if (a->words[i] != 0)
422 * bms_singleton_member - return the sole integer member of set
424 * Raises error if |a| is not 1.
427 bms_singleton_member(const Bitmapset *a)
434 elog(ERROR, "bms_singleton_member: set is empty");
436 for (wordnum = 0; wordnum < nwords; wordnum++)
438 bitmapword w = a->words[wordnum];
442 if (result >= 0 || HAS_MULTIPLE_ONES(w))
443 elog(ERROR, "bms_singleton_member: set has multiple members");
444 result = wordnum * BITS_PER_BITMAPWORD;
445 while ((w & 255) == 0)
450 result += rightmost_one_pos[w & 255];
454 elog(ERROR, "bms_singleton_member: set is empty");
459 * bms_num_members - count members of set
462 bms_num_members(const Bitmapset *a)
471 for (wordnum = 0; wordnum < nwords; wordnum++)
473 bitmapword w = a->words[wordnum];
475 /* we assume here that bitmapword is an unsigned type */
478 result += number_of_ones[w & 255];
486 * bms_membership - does a set have zero, one, or multiple members?
488 * This is faster than making an exact count with bms_num_members().
491 bms_membership(const Bitmapset *a)
493 BMS_Membership result = BMS_EMPTY_SET;
498 return BMS_EMPTY_SET;
500 for (wordnum = 0; wordnum < nwords; wordnum++)
502 bitmapword w = a->words[wordnum];
506 if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
508 result = BMS_SINGLETON;
515 * bms_is_empty - is a set empty?
517 * This is even faster than bms_membership().
520 bms_is_empty(const Bitmapset *a)
528 for (wordnum = 0; wordnum < nwords; wordnum++)
530 bitmapword w = a->words[wordnum];
540 * These operations all "recycle" their non-const inputs, ie, either
541 * return the modified input or pfree it if it can't hold the result.
543 * These should generally be used in the style
545 * foo = bms_add_member(foo, x);
550 * bms_add_member - add a specified member to set
552 * Input set is modified or recycled!
555 bms_add_member(Bitmapset *a, int x)
561 elog(ERROR, "bms_add_member: negative set member not allowed");
563 return bms_make_singleton(x);
564 wordnum = WORDNUM(x);
566 if (wordnum >= a->nwords)
568 /* Slow path: make a larger set and union the input set into it */
573 result = bms_make_singleton(x);
575 for (i = 0; i < nwords; i++)
577 result->words[i] |= a->words[i];
582 /* Fast path: x fits in existing set */
583 a->words[wordnum] |= ((bitmapword) 1 << bitnum);
588 * bms_del_member - remove a specified member from set
590 * No error if x is not currently a member of set
592 * Input set is modified in-place!
595 bms_del_member(Bitmapset *a, int x)
601 elog(ERROR, "bms_del_member: negative set member not allowed");
604 wordnum = WORDNUM(x);
606 if (wordnum < a->nwords)
608 a->words[wordnum] &= ~ ((bitmapword) 1 << bitnum);
614 * bms_add_members - like bms_union, but left input is recycled
617 bms_add_members(Bitmapset *a, const Bitmapset *b)
620 const Bitmapset *other;
624 /* Handle cases where either input is NULL */
629 /* Identify shorter and longer input; copy the longer one if needed */
630 if (a->nwords < b->nwords)
632 result = bms_copy(b);
640 /* And union the shorter input into the result */
641 otherlen = other->nwords;
642 for (i = 0; i < otherlen; i++)
644 result->words[i] |= other->words[i];
652 * bms_int_members - like bms_intersect, but left input is recycled
655 bms_int_members(Bitmapset *a, const Bitmapset *b)
660 /* Handle cases where either input is NULL */
668 /* Intersect b into a; we need never copy */
669 shortlen = Min(a->nwords, b->nwords);
670 for (i = 0; i < shortlen; i++)
672 a->words[i] &= b->words[i];
674 for (; i < a->nwords; i++)
682 * bms_del_members - like bms_difference, but left input is recycled
685 bms_del_members(Bitmapset *a, const Bitmapset *b)
690 /* Handle cases where either input is NULL */
695 /* Remove b's bits from a; we need never copy */
696 shortlen = Min(a->nwords, b->nwords);
697 for (i = 0; i < shortlen; i++)
699 a->words[i] &= ~ b->words[i];
705 * bms_join - like bms_union, but *both* inputs are recycled
708 bms_join(Bitmapset *a, Bitmapset *b)
715 /* Handle cases where either input is NULL */
720 /* Identify shorter and longer input; use longer one as result */
721 if (a->nwords < b->nwords)
731 /* And union the shorter input into the result */
732 otherlen = other->nwords;
733 for (i = 0; i < otherlen; i++)
735 result->words[i] |= other->words[i];
737 if (other != result) /* pure paranoia */
743 * bms_first_member - find and remove first member of a set
745 * Returns -1 if set is empty. NB: set is destructively modified!
747 * This is intended as support for iterating through the members of a set.
748 * The typical pattern is
750 * tmpset = bms_copy(inputset);
751 * while ((x = bms_first_member(tmpset)) >= 0)
759 bms_first_member(Bitmapset *a)
767 for (wordnum = 0; wordnum < nwords; wordnum++)
769 bitmapword w = a->words[wordnum];
775 w = RIGHTMOST_ONE(w);
776 a->words[wordnum] &= ~ w;
778 result = wordnum * BITS_PER_BITMAPWORD;
779 while ((w & 255) == 0)
784 result += rightmost_one_pos[w & 255];