Overview
SHA1 Hash: | 61ddd63b72f1436c9ad18b6bba9a754276ac50fc |
---|---|
Date: | 2008-03-06 22:58:48 |
User: | drh |
Comment: | Work toward making fossil work better on large repositories. This version implements a cache in the content manager. It is not clear yet if this is necessarily a good idea - this check-in might end up on an abandoned branch at some point. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified src/blob.c from [2269ea6655] to [abc48788ed].
@@ -206,11 +206,10 @@ /* ** Copy a blob */ void blob_copy(Blob *pTo, Blob *pFrom){ blob_is_init(pFrom); - blob_is_init(pTo); blob_zero(pTo); blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom)); } /*
Modified src/content.c from [1ea5bdca20] to [6b8e09987a].
@@ -26,16 +26,123 @@ #include "config.h" #include "content.h" #include <assert.h> /* +** Macros for debugging +*/ +#if 0 +# define CONTENT_TRACE(X) printf X; +#else +# define CONTENT_TRACE(X) +#endif + +/* +** The artifact retrival cache +*/ +#define MX_CACHE_CNT 50 /* Maximum number of positive cache entries */ +#define EXPELL_INTERVAL 5 /* How often to expell from a full cache */ +static struct { + int n; /* Current number of positive cache entries */ + int nextAge; /* Age counter for implementing LRU */ + int skipCnt; /* Used to limit entries expelled from cache */ + struct { /* One instance of this for each cache entry */ + int rid; /* Artifact id */ + int age; /* Age. Newer is larger */ + Blob content; /* Content of the artifact */ + } a[MX_CACHE_CNT]; /* The positive cache */ + + /* + ** The missing artifact cache. + ** + ** Artifacts whose record ID are in missingCache cannot be retrieved + ** either because they are phantoms or because they are a delta that + ** depends on a phantom. Artifacts whose content we are certain is + ** available are in availableCache. If an artifact is in neither cache + ** then its current availablity is unknown. + */ + Bag missing; /* Cache of artifacts that are incomplete */ + Bag available; /* Cache of artifacts that are complete */ +} contentCache; + + +/* +** Clear the content cache. +*/ +void content_clear_cache(void){ + int i; + for(i=0; i<contentCache.n; i++){ + blob_reset(&contentCache.a[i].content); + } + bag_clear(&contentCache.missing); + bag_clear(&contentCache.available); + contentCache.n = 0; +} + +/* ** Return the srcid associated with rid. Or return 0 if rid is ** original content and not a delta. */ static int findSrcid(int rid){ int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid); return srcid; +} + +/* +** Check to see if content is available for artifact "rid". Return +** true if it is. Return false if rid is a phantom or depends on +** a phantom. +*/ +int content_is_available(int rid){ + int srcid; + if( bag_find(&contentCache.missing, rid) ){ + return 0; + } + if( bag_find(&contentCache.available, rid) ){ + return 1; + } + if( db_int(-1, "SELECT size FROM blob WHERE rid=%d", rid)<0 ){ + bag_insert(&contentCache.missing, rid); + return 0; + } + srcid = findSrcid(rid); + if( srcid==0 ){ + bag_insert(&contentCache.available, rid); + return 1; + } + if( content_is_available(srcid) ){ + bag_insert(&contentCache.available, rid); + return 1; + }else{ + bag_insert(&contentCache.missing, rid); + return 0; + } +} + +/* +** Mark artifact rid as being available now. Update the cache to +** show that everything that was formerly unavailable because rid +** was missing is now available. +*/ +static void content_mark_available(int rid){ + Bag pending; + Stmt q; + if( bag_find(&contentCache.available, rid) ) return; + bag_init(&pending); + bag_insert(&pending, rid); + while( (rid = bag_first(&pending))!=0 ){ + bag_remove(&pending, rid); + bag_remove(&contentCache.missing, rid); + bag_insert(&contentCache.available, rid); + db_prepare(&q, "SELECT rid FROM delta WHERE srcid=%d", rid); + while( db_step(&q)==SQLITE_ROW ){ + int nx = db_column_int(&q, 0); + bag_insert(&pending, nx); + } + db_finalize(&q); + } + bag_clear(&pending); } /* ** Extract the content for ID rid and put it into the ** uninitialized blob. Return 1 on success. If the record @@ -44,16 +151,46 @@ int content_get(int rid, Blob *pBlob){ Stmt q; Blob src; int srcid; int rc = 0; + int i; static Bag inProcess; assert( g.repositoryOpen ); - srcid = findSrcid(rid); blob_zero(pBlob); + + /* Early out if we know the content is not available */ + if( bag_find(&contentCache.missing, rid) ){ + CONTENT_TRACE(("%*smiss from cache: %d\n", + bag_count(&inProcess), "", rid)) + return 0; + } + + /* Look for the artifact in the cache first */ + for(i=0; i<contentCache.n; i++){ + if( contentCache.a[i].rid==rid ){ + *pBlob = contentCache.a[i].content; + blob_zero(&contentCache.a[i].content); + contentCache.n--; + if( i<contentCache.n ){ + contentCache.a[i] = contentCache.a[contentCache.n]; + } + CONTENT_TRACE(("%*shit cache: %d\n", + bag_count(&inProcess), "", rid)) + return 1; + } + } + + /* See if we need to apply a delta to find this artifact */ + srcid = findSrcid(rid); + CONTENT_TRACE(("%*ssearching for %d. Need %d.\n", + bag_count(&inProcess), "", rid, srcid)) + + if( srcid ){ + /* Yes, a delta is required */ if( bag_find(&inProcess, srcid) ){ db_multi_exec( "UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;" "DELETE FROM delta WHERE rid=%d;" "INSERT OR IGNORE INTO phantom VALUES(%d);", @@ -61,10 +198,11 @@ ); blob_zero(pBlob); return 0; } bag_insert(&inProcess, srcid); + if( content_get(srcid, &src) ){ db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid); if( db_step(&q)==SQLITE_ROW ){ Blob delta; db_ephemeral_blob(&q, 0, &delta); @@ -73,29 +211,67 @@ blob_delta_apply(&src, &delta, pBlob); blob_reset(&delta); rc = 1; } db_finalize(&q); - blob_reset(&src); + + /* Save the srcid artifact in the cache */ + if( contentCache.n<MX_CACHE_CNT ){ + i = contentCache.n++; + }else if( ((contentCache.skipCnt++)%EXPELL_INTERVAL)!=0 ){ + i = -1; + }else{ + int j, best; + best = contentCache.nextAge+1; + i = -1; + for(j=0; j<contentCache.n; j++){ + if( contentCache.a[j].age<best ){ + i = j; + best = contentCache.a[j].age; + } + } + CONTENT_TRACE(("%*sexpell %d from cache\n", + bag_count(&inProcess), "", contentCache.a[i].rid)) + blob_reset(&contentCache.a[i].content); + } + if( i>=0 ){ + contentCache.a[i].content = src; + contentCache.a[i].age = contentCache.nextAge++; + contentCache.a[i].rid = srcid; + CONTENT_TRACE(("%*sadd %d to cache\n", + bag_count(&inProcess), "", srcid)) + }else{ + blob_reset(&src); + } } bag_remove(&inProcess, srcid); }else{ + /* No delta required. Read content directly from the database */ db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid); if( db_step(&q)==SQLITE_ROW ){ db_ephemeral_blob(&q, 0, pBlob); blob_uncompress(pBlob, pBlob); rc = 1; } db_finalize(&q); } + if( rc==0 ){ + bag_insert(&contentCache.missing, rid); + }else{ + bag_insert(&contentCache.available, rid); + } return rc; } /* -** Get the contents of a file within a given revision. +** Get the contents of a file within a given baseline. */ -int content_get_historical_file(const char *revision, const char *file, Blob *content){ +int content_get_historical_file( + const char *revision, /* Name of the baseline containing the file */ + const char *file, /* Name of the file */ + Blob *content /* Write file content here */ +){ Blob mfile; Manifest m; int i, rid=0; rid = name_to_rid(revision); @@ -194,10 +370,11 @@ int rid; Stmt s1; Blob cmpr; Blob hash; int markAsUnclustered = 0; + int isDephantomize = 0; assert( g.repositoryOpen ); if( pBlob && srcId==0 ){ sha1sum_blob(pBlob, &hash); }else{ @@ -249,12 +426,13 @@ ); blob_compress(pBlob, &cmpr); db_bind_blob(&s1, ":data", &cmpr); db_exec(&s1); db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid); - if( srcId==0 || db_int(0, "SELECT size FROM blob WHERE rid=%d", srcId)>0 ){ - after_dephantomize(rid, 0); + if( srcId==0 || content_is_available(srcId) ){ + isDephantomize = 1; + content_mark_available(rid); } }else{ /* We are creating a new entry */ db_prepare(&s1, "INSERT INTO blob(rcvid,size,uuid,content)" @@ -275,10 +453,17 @@ /* If the srcId is specified, then the data we just added is ** really a delta. Record this fact in the delta table. */ if( srcId ){ db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId); + } + if( !isDephantomize && bag_find(&contentCache.missing, rid) && + (srcId==0 || content_is_available(srcId)) ){ + content_mark_available(rid); + } + if( isDephantomize ){ + after_dephantomize(rid, 0); } /* Add the element to the unclustered table if has never been ** previously seen. */
Modified src/delta.c from [c73001b47f] to [abb4c0444e].
@@ -196,24 +196,31 @@ } /* ** Compute a 32-bit checksum on the N-byte buffer. Return the result. */ -static unsigned int checksum(const char *zIn, int N){ - const unsigned char *z = (const unsigned char*)zIn; - unsigned int sum = 0; - while( N>=4 ){ +static unsigned int checksum(const char *zIn, size_t N){ + const unsigned char *z = (const unsigned char *)zIn; + unsigned sum = 0; + while(N >= 16){ + sum += ((unsigned)z[0] + z[4] + z[8] + z[12]) << 24; + sum += ((unsigned)z[1] + z[5] + z[9] + z[13]) << 16; + sum += ((unsigned)z[2] + z[6] + z[10]+ z[14]) << 8; + sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]); + z += 16; + N -= 16; + } + while(N >= 4){ sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3]; z += 4; N -= 4; } - if( N>0 ){ - unsigned char zBuf[4]; - memset(zBuf, 0, sizeof(zBuf)); - memcpy(zBuf, z, N); - z = zBuf; - sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3]; + switch(N){ + case 3: sum += (z[2] << 8); + case 2: sum += (z[1] << 16); + case 1: sum += (z[0] << 24); + default: ; } return sum; } /*
Modified src/rebuild.c from [5e55de3bcd] to [b5c158aea9].
@@ -53,10 +53,87 @@ @ sqlcode text -- An SQL SELECT statement for this report @ ); ; /* +** Variables used for progress information +*/ +static int totalSize; /* Total number of artifacts to process */ +static int processCnt; /* Number processed so far */ +static int ttyOutput; /* Do progress output */ + +/* +** Called after each artifact is processed +*/ +static void rebuild_step_done(void){ + if( ttyOutput ){ + processCnt++; + printf("%d (%d%%)...\r", processCnt, (processCnt*100/totalSize)); + fflush(stdout); + } +} + +/* +** Rebuild cross-referencing information for the artifact +** rid with content pBase and all of its descendents. This +** routine clears the content buffer before returning. +*/ +static void rebuild_step(int rid, Blob *pBase){ + Stmt q1; + Bag children; + Blob copy; + Blob *pUse; + int nChild, i, cid; + + /* Find all children of artifact rid */ + db_prepare(&q1, "SELECT rid FROM delta WHERE srcid=%d", rid); + bag_init(&children); + while( db_step(&q1)==SQLITE_ROW ){ + bag_insert(&children, db_column_int(&q1, 0)); + } + nChild = bag_count(&children); + db_finalize(&q1); + + /* Crosslink the artifact */ + if( nChild==0 ){ + pUse = pBase; + }else{ + blob_copy(©, pBase); + pUse = © + } + manifest_crosslink(rid, pUse); + blob_reset(pUse); + + /* Call all children recursively */ + for(cid=bag_first(&children), i=1; cid; cid=bag_next(&children, cid), i++){ + Stmt q2; + int sz; + if( nChild==i ){ + pUse = pBase; + }else{ + blob_copy(©, pBase); + pUse = © + } + db_prepare(&q2, "SELECT content, size FROM blob WHERE rid=%d", cid); + if( db_step(&q2)==SQLITE_ROW && (sz = db_column_int(&q2,1))>=0 ){ + Blob delta; + db_ephemeral_blob(&q2, 0, &delta); + blob_uncompress(&delta, &delta); + blob_delta_apply(pUse, &delta, pUse); + blob_reset(&delta); + db_finalize(&q2); + rebuild_step(cid, pUse); + }else{ + db_finalize(&q2); + blob_reset(pUse); + } + } + bag_clear(&children); + rebuild_step_done(); +} + +/* ** Core function to rebuild the infomration in the derived tables of a ** fossil repository from the blobs. This function is shared between ** 'rebuild_database' ('rebuild') and 'reconstruct_cmd' ** ('reconstruct'), both of which have to regenerate this information ** from scratch. @@ -64,16 +141,17 @@ ** If the randomize parameter is true, then the BLOBs are deliberately ** extracted in a random order. This feature is used to test the ** ability of fossil to accept records in any order and still ** construct a sane repository. */ -int rebuild_db(int randomize, int ttyOutput){ +int rebuild_db(int randomize, int doOut){ Stmt s; int errCnt = 0; char *zTable; - int cnt = 0; + ttyOutput = doOut; + processCnt = 0; db_multi_exec(zSchemaUpdates); for(;;){ zTable = db_text(0, "SELECT name FROM sqlite_master" " WHERE type='table'" @@ -91,30 +169,26 @@ " WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))" ); db_multi_exec( "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')" ); + totalSize = db_int(0, "SELECT count(*) FROM blob"); db_prepare(&s, - "SELECT rid, size FROM blob %s" - " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)", - randomize ? "ORDER BY random()" : "" + "SELECT rid, size FROM blob" + " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)" + " AND NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)" ); while( db_step(&s)==SQLITE_ROW ){ int rid = db_column_int(&s, 0); int size = db_column_int(&s, 1); if( size>=0 ){ Blob content; - if( ttyOutput ){ - cnt++; - printf("%d...\r", cnt); - fflush(stdout); - } content_get(rid, &content); - manifest_crosslink(rid, &content); - blob_reset(&content); + rebuild_step(rid, &content); }else{ db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid); + rebuild_step_done(); } } db_finalize(&s); if( ttyOutput ){ printf("\n"); @@ -141,14 +215,15 @@ if( g.argc!=3 ){ usage("REPOSITORY-FILENAME"); } db_open_repository(g.argv[2]); db_begin_transaction(); + ttyOutput = 1; errCnt = rebuild_db(randomizeFlag, 1); if( errCnt && !forceFlag ){ printf("%d errors. Rolling back changes. Use --force to force a commit.\n", errCnt); db_end_transaction(1); }else{ db_end_transaction(0); } }
Modified src/verify.c from [8d633e10bb] to [f05153c954].
@@ -73,10 +73,11 @@ /* ** This routine is called just prior to each commit operation. */ static int verify_at_commit(void){ int rid; + content_clear_cache(); inFinalVerify = 1; rid = bag_first(&toVerify); while( rid>0 ){ verify_rid(rid); rid = bag_next(&toVerify, rid);