Diff
Not logged in

Differences From:

File src/bag.c part of check-in [e15fe43153] - A new decendent finding algorithm is (hopefully) faster. Changes to the timeline are in process and might not yet work. by drh on 2007-08-31 20:14:33. Also file src/bag.c part of check-in [bbcb6326c9] - Pulled in the navbar and timeline changes. by aku on 2007-09-17 00:58:51. [view]

To:

File src/bag.c part of check-in [7a2c37063a] - merge trunk into creole branch by bob on 2009-09-22 07:49:39. Also file src/bag.c part of check-in [1409fbe38c] - Performance improvements to the "bag" object. by drh on 2009-08-27 14:04:39. [view]

@@ -33,8 +33,23 @@
 #if INTERFACE
 /*
 ** An integer can appear in the bag at most once.
 ** Integers must be positive.
+**
+** On a hash collision, search continues to the next slot in the array,
+** looping back to the beginning of the array when we reach the end.
+** The search stops when a match is found or upon encountering a 0 entry.
+**
+** When an entry is deleted, its value is changed to -1.
+**
+** Bag.cnt is the number of live entries in the table.  Bag.used is
+** the number of live entries plus the number of deleted entries.  So
+** Bag.used>=Bag.cnt.  We want to keep Bag.used-Bag.cnt as small as
+** possible.
+**
+** The length of a search increases as the hash table fills up.  So
+** the table is enlarged whenever Bag.used reaches half of Bag.sz.  That
+** way, the expected collision length never exceeds 2.
 */
 struct Bag {
   int cnt;   /* Number of integers in the bag */
   int sz;    /* Number of slots in a[] */
@@ -65,12 +80,18 @@
 
 /*
 ** Change the size of the hash table on a bag so that
 ** it contains N slots
+**
+** Completely reconstruct the hash table from scratch.  Deleted
+** entries (indicated by a -1) are removed.  When finished, it
+** should be the case that p->cnt==p->used.
 */
 static void bag_resize(Bag *p, int newSize){
   int i;
   Bag old;
+  int nDel = 0;   /* Number of deleted entries */
+  int nLive = 0;  /* Number of live entries */
 
   old = *p;
   assert( newSize>old.cnt );
   p->a = malloc( sizeof(p->a[0])*newSize );
@@ -84,10 +105,15 @@
         h++;
         if( h==newSize ) h = 0;
       }
       p->a[h] = e;
+      nLive++;
+    }else if( e<0 ){
+      nDel++;
     }
   }
+  assert( p->cnt == nLive );
+  assert( p->used == nLive+nDel );
   p->used = p->cnt;
   bag_clear(&old);
 }
 
@@ -100,18 +126,19 @@
   unsigned h;
   int rc = 0;
   assert( e>0 );
   if( p->used+1 >= p->sz/2 ){
-    bag_resize(p,  p->cnt*2 + 20 );
+    int n = p->sz*2;
+    bag_resize(p,  n + 20 );
   }
   h = bag_hash(e)%p->sz;
-  while( p->a[h] && p->a[h]!=e ){
+  while( p->a[h]>0 && p->a[h]!=e ){
     h++;
     if( h>=p->sz ) h = 0;
   }
-  if( p->a[h]==0 ){
-    p->a[h] = e;
-    p->used++;
+  if( p->a[h]<=0 ){
+    if( p->a[h]==0 ) p->used++;
+    p->a[h] = e;
     p->cnt++;
     rc = 1;
   }
   return rc;
@@ -147,11 +174,21 @@
     h++;
     if( h>=p->sz ) h = 0;
   }
   if( p->a[h] ){
-    p->a[h] = -1;
+    int nx = h+1;
+    if( nx>=p->sz ) nx = 0;
+    if( p->a[nx]==0 ){
+      p->a[h] = 0;
+      p->used--;
+    }else{
+      p->a[h] = -1;
+    }
     p->cnt--;
-    if( p->sz>20 && p->cnt<p->sz/8 ){
+    if( p->cnt==0 ){
+      memset(p->a, 0, p->sz*sizeof(p->a[0]));
+      p->used = 0;
+    }else if( p->sz>40 && p->cnt<p->sz/8 ){
       bag_resize(p, p->sz/2);
     }
   }
 }