Differences From:
File
src/bag.c
part of check-in
[e15fe43153]
- A new decendent finding algorithm is (hopefully) faster. Changes to
the timeline are in process and might not yet work.
by
drh on
2007-08-31 20:14:33.
Also file
src/bag.c
part of check-in
[bbcb6326c9]
- Pulled in the navbar and timeline changes.
by
aku on
2007-09-17 00:58:51.
[view]
To:
File
src/bag.c
part of check-in
[7a2c37063a]
- merge trunk into creole branch
by
bob on
2009-09-22 07:49:39.
Also file
src/bag.c
part of check-in
[1409fbe38c]
- Performance improvements to the "bag" object.
by
drh on
2009-08-27 14:04:39.
[view]
@@ -33,8 +33,23 @@
#if INTERFACE
/*
** An integer can appear in the bag at most once.
** Integers must be positive.
+**
+** On a hash collision, search continues to the next slot in the array,
+** looping back to the beginning of the array when we reach the end.
+** The search stops when a match is found or upon encountering a 0 entry.
+**
+** When an entry is deleted, its value is changed to -1.
+**
+** Bag.cnt is the number of live entries in the table. Bag.used is
+** the number of live entries plus the number of deleted entries. So
+** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as
+** possible.
+**
+** The length of a search increases as the hash table fills up. So
+** the table is enlarged whenever Bag.used reaches half of Bag.sz. That
+** way, the expected collision length never exceeds 2.
*/
struct Bag {
int cnt; /* Number of integers in the bag */
int sz; /* Number of slots in a[] */
@@ -65,12 +80,18 @@
/*
** Change the size of the hash table on a bag so that
** it contains N slots
+**
+** Completely reconstruct the hash table from scratch. Deleted
+** entries (indicated by a -1) are removed. When finished, it
+** should be the case that p->cnt==p->used.
*/
static void bag_resize(Bag *p, int newSize){
int i;
Bag old;
+ int nDel = 0; /* Number of deleted entries */
+ int nLive = 0; /* Number of live entries */
old = *p;
assert( newSize>old.cnt );
p->a = malloc( sizeof(p->a[0])*newSize );
@@ -84,10 +105,15 @@
h++;
if( h==newSize ) h = 0;
}
p->a[h] = e;
+ nLive++;
+ }else if( e<0 ){
+ nDel++;
}
}
+ assert( p->cnt == nLive );
+ assert( p->used == nLive+nDel );
p->used = p->cnt;
bag_clear(&old);
}
@@ -100,18 +126,19 @@
unsigned h;
int rc = 0;
assert( e>0 );
if( p->used+1 >= p->sz/2 ){
- bag_resize(p, p->cnt*2 + 20 );
+ int n = p->sz*2;
+ bag_resize(p, n + 20 );
}
h = bag_hash(e)%p->sz;
- while( p->a[h] && p->a[h]!=e ){
+ while( p->a[h]>0 && p->a[h]!=e ){
h++;
if( h>=p->sz ) h = 0;
}
- if( p->a[h]==0 ){
- p->a[h] = e;
- p->used++;
+ if( p->a[h]<=0 ){
+ if( p->a[h]==0 ) p->used++;
+ p->a[h] = e;
p->cnt++;
rc = 1;
}
return rc;
@@ -147,11 +174,21 @@
h++;
if( h>=p->sz ) h = 0;
}
if( p->a[h] ){
- p->a[h] = -1;
+ int nx = h+1;
+ if( nx>=p->sz ) nx = 0;
+ if( p->a[nx]==0 ){
+ p->a[h] = 0;
+ p->used--;
+ }else{
+ p->a[h] = -1;
+ }
p->cnt--;
- if( p->sz>20 && p->cnt<p->sz/8 ){
+ if( p->cnt==0 ){
+ memset(p->a, 0, p->sz*sizeof(p->a[0]));
+ p->used = 0;
+ }else if( p->sz>40 && p->cnt<p->sz/8 ){
bag_resize(p, p->sz/2);
}
}
}