Overview
SHA1 Hash: | 1409fbe38c764e93ca606a195b572dfd70aec1b4 |
---|---|
Date: | 2009-08-27 14:04:39 |
User: | drh |
Comment: | Performance improvements to the "bag" object. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified src/bag.c from [c1c41296e0] to [91c2be456c].
@@ -32,10 +32,25 @@ #if INTERFACE /* ** An integer can appear in the bag at most once. ** Integers must be positive. +** +** On a hash collision, search continues to the next slot in the array, +** looping back to the beginning of the array when we reach the end. +** The search stops when a match is found or upon encountering a 0 entry. +** +** When an entry is deleted, its value is changed to -1. +** +** Bag.cnt is the number of live entries in the table. Bag.used is +** the number of live entries plus the number of deleted entries. So +** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as +** possible. +** +** The length of a search increases as the hash table fills up. So +** the table is enlarged whenever Bag.used reaches half of Bag.sz. That +** way, the expected collision length never exceeds 2. */ struct Bag { int cnt; /* Number of integers in the bag */ int sz; /* Number of slots in a[] */ int used; /* Number of used slots in a[] */ @@ -64,14 +79,20 @@ #define bag_hash(i) (i*101) /* ** Change the size of the hash table on a bag so that ** it contains N slots +** +** Completely reconstruct the hash table from scratch. Deleted +** entries (indicated by a -1) are removed. When finished, it +** should be the case that p->cnt==p->used. */ static void bag_resize(Bag *p, int newSize){ int i; Bag old; + int nDel = 0; /* Number of deleted entries */ + int nLive = 0; /* Number of live entries */ old = *p; assert( newSize>old.cnt ); p->a = malloc( sizeof(p->a[0])*newSize ); p->sz = newSize; @@ -83,12 +104,17 @@ while( p->a[h] ){ h++; if( h==newSize ) h = 0; } p->a[h] = e; + nLive++; + }else if( e<0 ){ + nDel++; } } + assert( p->cnt == nLive ); + assert( p->used == nLive+nDel ); p->used = p->cnt; bag_clear(&old); } /* @@ -99,20 +125,21 @@ int bag_insert(Bag *p, int e){ unsigned h; int rc = 0; assert( e>0 ); if( p->used+1 >= p->sz/2 ){ - bag_resize(p, p->cnt*2 + 20 ); + int n = p->sz*2; + bag_resize(p, n + 20 ); } h = bag_hash(e)%p->sz; - while( p->a[h] && p->a[h]!=e ){ + while( p->a[h]>0 && p->a[h]!=e ){ h++; if( h>=p->sz ) h = 0; } - if( p->a[h]==0 ){ - p->a[h] = e; - p->used++; + if( p->a[h]<=0 ){ + if( p->a[h]==0 ) p->used++; + p->a[h] = e; p->cnt++; rc = 1; } return rc; } @@ -146,13 +173,23 @@ while( p->a[h] && p->a[h]!=e ){ h++; if( h>=p->sz ) h = 0; } if( p->a[h] ){ - p->a[h] = -1; + int nx = h+1; + if( nx>=p->sz ) nx = 0; + if( p->a[nx]==0 ){ + p->a[h] = 0; + p->used--; + }else{ + p->a[h] = -1; + } p->cnt--; - if( p->sz>20 && p->cnt<p->sz/8 ){ + if( p->cnt==0 ){ + memset(p->a, 0, p->sz*sizeof(p->a[0])); + p->used = 0; + }else if( p->sz>40 && p->cnt<p->sz/8 ){ bag_resize(p, p->sz/2); } } }