1 /*-------------------------------------------------------------------------
4 * PostgreSQL generic bitmap set package
6 * A bitmap set can represent any set of nonnegative integers, although
7 * it is mainly intended for sets where the maximum value is not large,
8 * say at most a few hundred. By convention, a NULL pointer is always
9 * accepted by all operations to represent the empty set. (But beware
10 * that this is not the only representation of the empty set. Use
11 * bms_is_empty() in preference to testing for NULL.)
14 * Copyright (c) 2003-2012, PostgreSQL Global Development Group
17 * src/backend/nodes/bitmapset.c
19 *-------------------------------------------------------------------------
23 #include "access/hash.h"
26 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
27 #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
29 #define BITMAPSET_SIZE(nwords) \
30 (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
33 * This is a well-known cute trick for isolating the rightmost one-bit
34 * in a word. It assumes two's complement arithmetic. Consider any
35 * nonzero value, and focus attention on the rightmost one. The value is
38 * where x's are unspecified bits. The two's complement negative is formed
39 * by inverting all the bits and adding one. Inversion gives
41 * where each y is the inverse of the corresponding x. Incrementing gives
43 * and then ANDing with the original value gives
45 * This works for all cases except original value = zero, where of course
49 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
51 #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
55 * Lookup tables to avoid need for bit-by-bit groveling
57 * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
58 * in a nonzero byte value x. The entry for x=0 is never used.
60 * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
62 * We could make these tables larger and reduce the number of iterations
63 * in the functions that use them, but bytewise shifts and masks are
64 * especially fast on many machines, so working a byte at a time seems best.
67 static const uint8 rightmost_one_pos[256] = {
68 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
79 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
80 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
81 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
82 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
83 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
86 static const uint8 number_of_ones[256] = {
87 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
88 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
91 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
92 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
95 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
96 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
99 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
100 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
107 * bms_copy - make a palloc'd copy of a bitmapset
110 bms_copy(const Bitmapset *a)
117 size = BITMAPSET_SIZE(a->nwords);
118 result = (Bitmapset *) palloc(size);
119 memcpy(result, a, size);
124 * bms_equal - are two bitmapsets equal?
126 * This is logical not physical equality; in particular, a NULL pointer will
127 * be reported as equal to a palloc'd value containing no members.
130 bms_equal(const Bitmapset *a, const Bitmapset *b)
132 const Bitmapset *shorter;
133 const Bitmapset *longer;
138 /* Handle cases where either input is NULL */
143 return bms_is_empty(b);
146 return bms_is_empty(a);
147 /* Identify shorter and longer input */
148 if (a->nwords <= b->nwords)
159 shortlen = shorter->nwords;
160 for (i = 0; i < shortlen; i++)
162 if (shorter->words[i] != longer->words[i])
165 longlen = longer->nwords;
166 for (; i < longlen; i++)
168 if (longer->words[i] != 0)
175 * bms_make_singleton - build a bitmapset containing a single member
178 bms_make_singleton(int x)
185 elog(ERROR, "negative bitmapset member not allowed");
186 wordnum = WORDNUM(x);
188 result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
189 result->nwords = wordnum + 1;
190 result->words[wordnum] = ((bitmapword) 1 << bitnum);
195 * bms_free - free a bitmapset
197 * Same as pfree except for allowing NULL input
200 bms_free(Bitmapset *a)
208 * These operations all make a freshly palloc'd result,
209 * leaving their inputs untouched
214 * bms_union - set union
217 bms_union(const Bitmapset *a, const Bitmapset *b)
220 const Bitmapset *other;
224 /* Handle cases where either input is NULL */
229 /* Identify shorter and longer input; copy the longer one */
230 if (a->nwords <= b->nwords)
232 result = bms_copy(b);
237 result = bms_copy(a);
240 /* And union the shorter input into the result */
241 otherlen = other->nwords;
242 for (i = 0; i < otherlen; i++)
243 result->words[i] |= other->words[i];
248 * bms_intersect - set intersection
251 bms_intersect(const Bitmapset *a, const Bitmapset *b)
254 const Bitmapset *other;
258 /* Handle cases where either input is NULL */
259 if (a == NULL || b == NULL)
261 /* Identify shorter and longer input; copy the shorter one */
262 if (a->nwords <= b->nwords)
264 result = bms_copy(a);
269 result = bms_copy(b);
272 /* And intersect the longer input with the result */
273 resultlen = result->nwords;
274 for (i = 0; i < resultlen; i++)
275 result->words[i] &= other->words[i];
280 * bms_difference - set difference (ie, A without members of B)
283 bms_difference(const Bitmapset *a, const Bitmapset *b)
289 /* Handle cases where either input is NULL */
294 /* Copy the left input */
295 result = bms_copy(a);
296 /* And remove b's bits from result */
297 shortlen = Min(a->nwords, b->nwords);
298 for (i = 0; i < shortlen; i++)
299 result->words[i] &= ~b->words[i];
304 * bms_is_subset - is A a subset of B?
307 bms_is_subset(const Bitmapset *a, const Bitmapset *b)
313 /* Handle cases where either input is NULL */
315 return true; /* empty set is a subset of anything */
317 return bms_is_empty(a);
318 /* Check common words */
319 shortlen = Min(a->nwords, b->nwords);
320 for (i = 0; i < shortlen; i++)
322 if ((a->words[i] & ~b->words[i]) != 0)
325 /* Check extra words */
326 if (a->nwords > b->nwords)
329 for (; i < longlen; i++)
331 if (a->words[i] != 0)
339 * bms_subset_compare - compare A and B for equality/subset relationships
341 * This is more efficient than testing bms_is_subset in both directions.
344 bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
346 BMS_Comparison result;
351 /* Handle cases where either input is NULL */
356 return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
359 return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
360 /* Check common words */
361 result = BMS_EQUAL; /* status so far */
362 shortlen = Min(a->nwords, b->nwords);
363 for (i = 0; i < shortlen; i++)
365 bitmapword aword = a->words[i];
366 bitmapword bword = b->words[i];
368 if ((aword & ~bword) != 0)
370 /* a is not a subset of b */
371 if (result == BMS_SUBSET1)
372 return BMS_DIFFERENT;
373 result = BMS_SUBSET2;
375 if ((bword & ~aword) != 0)
377 /* b is not a subset of a */
378 if (result == BMS_SUBSET2)
379 return BMS_DIFFERENT;
380 result = BMS_SUBSET1;
383 /* Check extra words */
384 if (a->nwords > b->nwords)
387 for (; i < longlen; i++)
389 if (a->words[i] != 0)
391 /* a is not a subset of b */
392 if (result == BMS_SUBSET1)
393 return BMS_DIFFERENT;
394 result = BMS_SUBSET2;
398 else if (a->nwords < b->nwords)
401 for (; i < longlen; i++)
403 if (b->words[i] != 0)
405 /* b is not a subset of a */
406 if (result == BMS_SUBSET2)
407 return BMS_DIFFERENT;
408 result = BMS_SUBSET1;
416 * bms_is_member - is X a member of A?
419 bms_is_member(int x, const Bitmapset *a)
424 /* XXX better to just return false for x<0 ? */
426 elog(ERROR, "negative bitmapset member not allowed");
429 wordnum = WORDNUM(x);
431 if (wordnum >= a->nwords)
433 if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
439 * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
442 bms_overlap(const Bitmapset *a, const Bitmapset *b)
447 /* Handle cases where either input is NULL */
448 if (a == NULL || b == NULL)
450 /* Check words in common */
451 shortlen = Min(a->nwords, b->nwords);
452 for (i = 0; i < shortlen; i++)
454 if ((a->words[i] & b->words[i]) != 0)
461 * bms_nonempty_difference - do sets have a nonempty difference?
464 bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
469 /* Handle cases where either input is NULL */
473 return !bms_is_empty(a);
474 /* Check words in common */
475 shortlen = Min(a->nwords, b->nwords);
476 for (i = 0; i < shortlen; i++)
478 if ((a->words[i] & ~b->words[i]) != 0)
481 /* Check extra words in a */
482 for (; i < a->nwords; i++)
484 if (a->words[i] != 0)
491 * bms_singleton_member - return the sole integer member of set
493 * Raises error if |a| is not 1.
496 bms_singleton_member(const Bitmapset *a)
503 elog(ERROR, "bitmapset is empty");
505 for (wordnum = 0; wordnum < nwords; wordnum++)
507 bitmapword w = a->words[wordnum];
511 if (result >= 0 || HAS_MULTIPLE_ONES(w))
512 elog(ERROR, "bitmapset has multiple members");
513 result = wordnum * BITS_PER_BITMAPWORD;
514 while ((w & 255) == 0)
519 result += rightmost_one_pos[w & 255];
523 elog(ERROR, "bitmapset is empty");
528 * bms_num_members - count members of set
531 bms_num_members(const Bitmapset *a)
540 for (wordnum = 0; wordnum < nwords; wordnum++)
542 bitmapword w = a->words[wordnum];
544 /* we assume here that bitmapword is an unsigned type */
547 result += number_of_ones[w & 255];
555 * bms_membership - does a set have zero, one, or multiple members?
557 * This is faster than making an exact count with bms_num_members().
560 bms_membership(const Bitmapset *a)
562 BMS_Membership result = BMS_EMPTY_SET;
567 return BMS_EMPTY_SET;
569 for (wordnum = 0; wordnum < nwords; wordnum++)
571 bitmapword w = a->words[wordnum];
575 if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
577 result = BMS_SINGLETON;
584 * bms_is_empty - is a set empty?
586 * This is even faster than bms_membership().
589 bms_is_empty(const Bitmapset *a)
597 for (wordnum = 0; wordnum < nwords; wordnum++)
599 bitmapword w = a->words[wordnum];
609 * These operations all "recycle" their non-const inputs, ie, either
610 * return the modified input or pfree it if it can't hold the result.
612 * These should generally be used in the style
614 * foo = bms_add_member(foo, x);
619 * bms_add_member - add a specified member to set
621 * Input set is modified or recycled!
624 bms_add_member(Bitmapset *a, int x)
630 elog(ERROR, "negative bitmapset member not allowed");
632 return bms_make_singleton(x);
633 wordnum = WORDNUM(x);
635 if (wordnum >= a->nwords)
637 /* Slow path: make a larger set and union the input set into it */
642 result = bms_make_singleton(x);
644 for (i = 0; i < nwords; i++)
645 result->words[i] |= a->words[i];
649 /* Fast path: x fits in existing set */
650 a->words[wordnum] |= ((bitmapword) 1 << bitnum);
655 * bms_del_member - remove a specified member from set
657 * No error if x is not currently a member of set
659 * Input set is modified in-place!
662 bms_del_member(Bitmapset *a, int x)
668 elog(ERROR, "negative bitmapset member not allowed");
671 wordnum = WORDNUM(x);
673 if (wordnum < a->nwords)
674 a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
679 * bms_add_members - like bms_union, but left input is recycled
682 bms_add_members(Bitmapset *a, const Bitmapset *b)
685 const Bitmapset *other;
689 /* Handle cases where either input is NULL */
694 /* Identify shorter and longer input; copy the longer one if needed */
695 if (a->nwords < b->nwords)
697 result = bms_copy(b);
705 /* And union the shorter input into the result */
706 otherlen = other->nwords;
707 for (i = 0; i < otherlen; i++)
708 result->words[i] |= other->words[i];
715 * bms_int_members - like bms_intersect, but left input is recycled
718 bms_int_members(Bitmapset *a, const Bitmapset *b)
723 /* Handle cases where either input is NULL */
731 /* Intersect b into a; we need never copy */
732 shortlen = Min(a->nwords, b->nwords);
733 for (i = 0; i < shortlen; i++)
734 a->words[i] &= b->words[i];
735 for (; i < a->nwords; i++)
741 * bms_del_members - like bms_difference, but left input is recycled
744 bms_del_members(Bitmapset *a, const Bitmapset *b)
749 /* Handle cases where either input is NULL */
754 /* Remove b's bits from a; we need never copy */
755 shortlen = Min(a->nwords, b->nwords);
756 for (i = 0; i < shortlen; i++)
757 a->words[i] &= ~b->words[i];
762 * bms_join - like bms_union, but *both* inputs are recycled
765 bms_join(Bitmapset *a, Bitmapset *b)
772 /* Handle cases where either input is NULL */
777 /* Identify shorter and longer input; use longer one as result */
778 if (a->nwords < b->nwords)
788 /* And union the shorter input into the result */
789 otherlen = other->nwords;
790 for (i = 0; i < otherlen; i++)
791 result->words[i] |= other->words[i];
792 if (other != result) /* pure paranoia */
798 * bms_first_member - find and remove first member of a set
800 * Returns -1 if set is empty. NB: set is destructively modified!
802 * This is intended as support for iterating through the members of a set.
803 * The typical pattern is
805 * tmpset = bms_copy(inputset);
806 * while ((x = bms_first_member(tmpset)) >= 0)
812 bms_first_member(Bitmapset *a)
820 for (wordnum = 0; wordnum < nwords; wordnum++)
822 bitmapword w = a->words[wordnum];
828 w = RIGHTMOST_ONE(w);
829 a->words[wordnum] &= ~w;
831 result = wordnum * BITS_PER_BITMAPWORD;
832 while ((w & 255) == 0)
837 result += rightmost_one_pos[w & 255];
845 * bms_hash_value - compute a hash key for a Bitmapset
847 * Note: we must ensure that any two bitmapsets that are bms_equal() will
848 * hash to the same value; in practice this means that trailing all-zero
849 * words must not affect the result. Hence we strip those before applying
853 bms_hash_value(const Bitmapset *a)
858 return 0; /* All empty sets hash to 0 */
859 for (lastword = a->nwords; --lastword >= 0;)
861 if (a->words[lastword] != 0)
865 return 0; /* All empty sets hash to 0 */
866 return DatumGetUInt32(hash_any((const unsigned char *) a->words,
867 (lastword + 1) * sizeof(bitmapword)));