dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Copyright (c) 2006 D. Richard Hipp dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This program is free software; you can redistribute it and/or dbda8d6ce9 2007-07-21 drh: ** modify it under the terms of the GNU General Public dbda8d6ce9 2007-07-21 drh: ** License version 2 as published by the Free Software Foundation. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This program is distributed in the hope that it will be useful, dbda8d6ce9 2007-07-21 drh: ** but WITHOUT ANY WARRANTY; without even the implied warranty of dbda8d6ce9 2007-07-21 drh: ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dbda8d6ce9 2007-07-21 drh: ** General Public License for more details. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** You should have received a copy of the GNU General Public dbda8d6ce9 2007-07-21 drh: ** License along with this library; if not, write to the dbda8d6ce9 2007-07-21 drh: ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, dbda8d6ce9 2007-07-21 drh: ** Boston, MA 02111-1307, USA. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Author contact information: dbda8d6ce9 2007-07-21 drh: ** drh@hwaci.com dbda8d6ce9 2007-07-21 drh: ** http://www.hwaci.com/drh/ dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ******************************************************************************* dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Procedures store and retrieve records from the repository dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: #include "config.h" dbda8d6ce9 2007-07-21 drh: #include "content.h" dbda8d6ce9 2007-07-21 drh: #include <assert.h> dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* 61ddd63b72 2008-03-06 drh: ** Macros for debugging 61ddd63b72 2008-03-06 drh: */ 61ddd63b72 2008-03-06 drh: #if 0 61ddd63b72 2008-03-06 drh: # define CONTENT_TRACE(X) printf X; 61ddd63b72 2008-03-06 drh: #else 61ddd63b72 2008-03-06 drh: # define CONTENT_TRACE(X) 61ddd63b72 2008-03-06 drh: #endif 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* 61ddd63b72 2008-03-06 drh: ** The artifact retrival cache 61ddd63b72 2008-03-06 drh: */ 61ddd63b72 2008-03-06 drh: #define MX_CACHE_CNT 50 /* Maximum number of positive cache entries */ 61ddd63b72 2008-03-06 drh: #define EXPELL_INTERVAL 5 /* How often to expell from a full cache */ 61ddd63b72 2008-03-06 drh: static struct { 61ddd63b72 2008-03-06 drh: int n; /* Current number of positive cache entries */ 61ddd63b72 2008-03-06 drh: int nextAge; /* Age counter for implementing LRU */ 61ddd63b72 2008-03-06 drh: int skipCnt; /* Used to limit entries expelled from cache */ 61ddd63b72 2008-03-06 drh: struct { /* One instance of this for each cache entry */ 61ddd63b72 2008-03-06 drh: int rid; /* Artifact id */ 61ddd63b72 2008-03-06 drh: int age; /* Age. Newer is larger */ 61ddd63b72 2008-03-06 drh: Blob content; /* Content of the artifact */ 61ddd63b72 2008-03-06 drh: } a[MX_CACHE_CNT]; /* The positive cache */ 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* 61ddd63b72 2008-03-06 drh: ** The missing artifact cache. 61ddd63b72 2008-03-06 drh: ** 61ddd63b72 2008-03-06 drh: ** Artifacts whose record ID are in missingCache cannot be retrieved 61ddd63b72 2008-03-06 drh: ** either because they are phantoms or because they are a delta that 61ddd63b72 2008-03-06 drh: ** depends on a phantom. Artifacts whose content we are certain is 61ddd63b72 2008-03-06 drh: ** available are in availableCache. If an artifact is in neither cache 61ddd63b72 2008-03-06 drh: ** then its current availablity is unknown. 61ddd63b72 2008-03-06 drh: */ 61ddd63b72 2008-03-06 drh: Bag missing; /* Cache of artifacts that are incomplete */ 61ddd63b72 2008-03-06 drh: Bag available; /* Cache of artifacts that are complete */ 61ddd63b72 2008-03-06 drh: } contentCache; 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* 61ddd63b72 2008-03-06 drh: ** Clear the content cache. 61ddd63b72 2008-03-06 drh: */ 61ddd63b72 2008-03-06 drh: void content_clear_cache(void){ 61ddd63b72 2008-03-06 drh: int i; 61ddd63b72 2008-03-06 drh: for(i=0; i<contentCache.n; i++){ 61ddd63b72 2008-03-06 drh: blob_reset(&contentCache.a[i].content); 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: bag_clear(&contentCache.missing); 61ddd63b72 2008-03-06 drh: bag_clear(&contentCache.available); 61ddd63b72 2008-03-06 drh: contentCache.n = 0; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* dbda8d6ce9 2007-07-21 drh: ** Return the srcid associated with rid. Or return 0 if rid is dbda8d6ce9 2007-07-21 drh: ** original content and not a delta. dbda8d6ce9 2007-07-21 drh: */ 573a464cb7 2007-08-10 drh: static int findSrcid(int rid){ 573a464cb7 2007-08-10 drh: int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid); dbda8d6ce9 2007-07-21 drh: return srcid; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* 61ddd63b72 2008-03-06 drh: ** Check to see if content is available for artifact "rid". Return 61ddd63b72 2008-03-06 drh: ** true if it is. Return false if rid is a phantom or depends on 61ddd63b72 2008-03-06 drh: ** a phantom. 61ddd63b72 2008-03-06 drh: */ 61ddd63b72 2008-03-06 drh: int content_is_available(int rid){ 61ddd63b72 2008-03-06 drh: int srcid; 61ddd63b72 2008-03-06 drh: if( bag_find(&contentCache.missing, rid) ){ 61ddd63b72 2008-03-06 drh: return 0; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: if( bag_find(&contentCache.available, rid) ){ 61ddd63b72 2008-03-06 drh: return 1; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: if( db_int(-1, "SELECT size FROM blob WHERE rid=%d", rid)<0 ){ 61ddd63b72 2008-03-06 drh: bag_insert(&contentCache.missing, rid); 61ddd63b72 2008-03-06 drh: return 0; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: srcid = findSrcid(rid); 61ddd63b72 2008-03-06 drh: if( srcid==0 ){ 61ddd63b72 2008-03-06 drh: bag_insert(&contentCache.available, rid); 61ddd63b72 2008-03-06 drh: return 1; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: if( content_is_available(srcid) ){ 61ddd63b72 2008-03-06 drh: bag_insert(&contentCache.available, rid); 61ddd63b72 2008-03-06 drh: return 1; 61ddd63b72 2008-03-06 drh: }else{ 61ddd63b72 2008-03-06 drh: bag_insert(&contentCache.missing, rid); 61ddd63b72 2008-03-06 drh: return 0; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* 61ddd63b72 2008-03-06 drh: ** Mark artifact rid as being available now. Update the cache to 61ddd63b72 2008-03-06 drh: ** show that everything that was formerly unavailable because rid 61ddd63b72 2008-03-06 drh: ** was missing is now available. 61ddd63b72 2008-03-06 drh: */ 61ddd63b72 2008-03-06 drh: static void content_mark_available(int rid){ 61ddd63b72 2008-03-06 drh: Bag pending; 61ddd63b72 2008-03-06 drh: Stmt q; 61ddd63b72 2008-03-06 drh: if( bag_find(&contentCache.available, rid) ) return; 61ddd63b72 2008-03-06 drh: bag_init(&pending); 61ddd63b72 2008-03-06 drh: bag_insert(&pending, rid); 61ddd63b72 2008-03-06 drh: while( (rid = bag_first(&pending))!=0 ){ 61ddd63b72 2008-03-06 drh: bag_remove(&pending, rid); 61ddd63b72 2008-03-06 drh: bag_remove(&contentCache.missing, rid); 61ddd63b72 2008-03-06 drh: bag_insert(&contentCache.available, rid); 61ddd63b72 2008-03-06 drh: db_prepare(&q, "SELECT rid FROM delta WHERE srcid=%d", rid); 61ddd63b72 2008-03-06 drh: while( db_step(&q)==SQLITE_ROW ){ 61ddd63b72 2008-03-06 drh: int nx = db_column_int(&q, 0); 61ddd63b72 2008-03-06 drh: bag_insert(&pending, nx); 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: db_finalize(&q); 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: bag_clear(&pending); 573a464cb7 2007-08-10 drh: } 573a464cb7 2007-08-10 drh: 573a464cb7 2007-08-10 drh: /* dbda8d6ce9 2007-07-21 drh: ** Extract the content for ID rid and put it into the 573a464cb7 2007-08-10 drh: ** uninitialized blob. Return 1 on success. If the record 573a464cb7 2007-08-10 drh: ** is a phantom, zero pBlob and return 0. dbda8d6ce9 2007-07-21 drh: */ 573a464cb7 2007-08-10 drh: int content_get(int rid, Blob *pBlob){ dbda8d6ce9 2007-07-21 drh: Stmt q; 573a464cb7 2007-08-10 drh: Blob src; dbda8d6ce9 2007-07-21 drh: int srcid; 573a464cb7 2007-08-10 drh: int rc = 0; 61ddd63b72 2008-03-06 drh: int i; fb90abe5bd 2007-12-03 drh: static Bag inProcess; 573a464cb7 2007-08-10 drh: dbda8d6ce9 2007-07-21 drh: assert( g.repositoryOpen ); 573a464cb7 2007-08-10 drh: blob_zero(pBlob); 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* Early out if we know the content is not available */ 61ddd63b72 2008-03-06 drh: if( bag_find(&contentCache.missing, rid) ){ 61ddd63b72 2008-03-06 drh: CONTENT_TRACE(("%*smiss from cache: %d\n", 61ddd63b72 2008-03-06 drh: bag_count(&inProcess), "", rid)) 61ddd63b72 2008-03-06 drh: return 0; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* Look for the artifact in the cache first */ 61ddd63b72 2008-03-06 drh: for(i=0; i<contentCache.n; i++){ 61ddd63b72 2008-03-06 drh: if( contentCache.a[i].rid==rid ){ 61ddd63b72 2008-03-06 drh: *pBlob = contentCache.a[i].content; 61ddd63b72 2008-03-06 drh: blob_zero(&contentCache.a[i].content); 61ddd63b72 2008-03-06 drh: contentCache.n--; 61ddd63b72 2008-03-06 drh: if( i<contentCache.n ){ 61ddd63b72 2008-03-06 drh: contentCache.a[i] = contentCache.a[contentCache.n]; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: CONTENT_TRACE(("%*shit cache: %d\n", 61ddd63b72 2008-03-06 drh: bag_count(&inProcess), "", rid)) 61ddd63b72 2008-03-06 drh: return 1; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* See if we need to apply a delta to find this artifact */ 61ddd63b72 2008-03-06 drh: srcid = findSrcid(rid); 61ddd63b72 2008-03-06 drh: CONTENT_TRACE(("%*ssearching for %d. Need %d.\n", 61ddd63b72 2008-03-06 drh: bag_count(&inProcess), "", rid, srcid)) 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: dbda8d6ce9 2007-07-21 drh: if( srcid ){ 61ddd63b72 2008-03-06 drh: /* Yes, a delta is required */ fb90abe5bd 2007-12-03 drh: if( bag_find(&inProcess, srcid) ){ fb90abe5bd 2007-12-03 drh: db_multi_exec( fb90abe5bd 2007-12-03 drh: "UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;" fb90abe5bd 2007-12-03 drh: "DELETE FROM delta WHERE rid=%d;" fb90abe5bd 2007-12-03 drh: "INSERT OR IGNORE INTO phantom VALUES(%d);", fb90abe5bd 2007-12-03 drh: srcid, srcid, srcid fb90abe5bd 2007-12-03 drh: ); fb90abe5bd 2007-12-03 drh: blob_zero(pBlob); fb90abe5bd 2007-12-03 drh: return 0; fb90abe5bd 2007-12-03 drh: } fb90abe5bd 2007-12-03 drh: bag_insert(&inProcess, srcid); 61ddd63b72 2008-03-06 drh: 573a464cb7 2007-08-10 drh: if( content_get(srcid, &src) ){ 573a464cb7 2007-08-10 drh: db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid); 573a464cb7 2007-08-10 drh: if( db_step(&q)==SQLITE_ROW ){ 573a464cb7 2007-08-10 drh: Blob delta; 573a464cb7 2007-08-10 drh: db_ephemeral_blob(&q, 0, &delta); 573a464cb7 2007-08-10 drh: blob_uncompress(&delta, &delta); 573a464cb7 2007-08-10 drh: blob_init(pBlob,0,0); 573a464cb7 2007-08-10 drh: blob_delta_apply(&src, &delta, pBlob); 573a464cb7 2007-08-10 drh: blob_reset(&delta); 573a464cb7 2007-08-10 drh: rc = 1; 573a464cb7 2007-08-10 drh: } 573a464cb7 2007-08-10 drh: db_finalize(&q); 61ddd63b72 2008-03-06 drh: 61ddd63b72 2008-03-06 drh: /* Save the srcid artifact in the cache */ 61ddd63b72 2008-03-06 drh: if( contentCache.n<MX_CACHE_CNT ){ 61ddd63b72 2008-03-06 drh: i = contentCache.n++; 61ddd63b72 2008-03-06 drh: }else if( ((contentCache.skipCnt++)%EXPELL_INTERVAL)!=0 ){ 61ddd63b72 2008-03-06 drh: i = -1; 61ddd63b72 2008-03-06 drh: }else{ 61ddd63b72 2008-03-06 drh: int j, best; 61ddd63b72 2008-03-06 drh: best = contentCache.nextAge+1; 61ddd63b72 2008-03-06 drh: i = -1; 61ddd63b72 2008-03-06 drh: for(j=0; j<contentCache.n; j++){ 61ddd63b72 2008-03-06 drh: if( contentCache.a[j].age<best ){ 61ddd63b72 2008-03-06 drh: i = j; 61ddd63b72 2008-03-06 drh: best = contentCache.a[j].age; 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: CONTENT_TRACE(("%*sexpell %d from cache\n", 61ddd63b72 2008-03-06 drh: bag_count(&inProcess), "", contentCache.a[i].rid)) 61ddd63b72 2008-03-06 drh: blob_reset(&contentCache.a[i].content); 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: if( i>=0 ){ 61ddd63b72 2008-03-06 drh: contentCache.a[i].content = src; 61ddd63b72 2008-03-06 drh: contentCache.a[i].age = contentCache.nextAge++; 61ddd63b72 2008-03-06 drh: contentCache.a[i].rid = srcid; 61ddd63b72 2008-03-06 drh: CONTENT_TRACE(("%*sadd %d to cache\n", 61ddd63b72 2008-03-06 drh: bag_count(&inProcess), "", srcid)) 61ddd63b72 2008-03-06 drh: }else{ 61ddd63b72 2008-03-06 drh: blob_reset(&src); 61ddd63b72 2008-03-06 drh: } dbda8d6ce9 2007-07-21 drh: } fb90abe5bd 2007-12-03 drh: bag_remove(&inProcess, srcid); dbda8d6ce9 2007-07-21 drh: }else{ 61ddd63b72 2008-03-06 drh: /* No delta required. Read content directly from the database */ 573a464cb7 2007-08-10 drh: db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid); dbda8d6ce9 2007-07-21 drh: if( db_step(&q)==SQLITE_ROW ){ dbda8d6ce9 2007-07-21 drh: db_ephemeral_blob(&q, 0, pBlob); dbda8d6ce9 2007-07-21 drh: blob_uncompress(pBlob, pBlob); 573a464cb7 2007-08-10 drh: rc = 1; dbda8d6ce9 2007-07-21 drh: } 573a464cb7 2007-08-10 drh: db_finalize(&q); 573a464cb7 2007-08-10 drh: } 61ddd63b72 2008-03-06 drh: if( rc==0 ){ 61ddd63b72 2008-03-06 drh: bag_insert(&contentCache.missing, rid); 61ddd63b72 2008-03-06 drh: }else{ 61ddd63b72 2008-03-06 drh: bag_insert(&contentCache.available, rid); 61ddd63b72 2008-03-06 drh: } 573a464cb7 2007-08-10 drh: return rc; dbda8d6ce9 2007-07-21 drh: } 574763bab9 2007-09-26 jnc: 574763bab9 2007-09-26 jnc: /* 61ddd63b72 2008-03-06 drh: ** Get the contents of a file within a given baseline. 574763bab9 2007-09-26 jnc: */ 61ddd63b72 2008-03-06 drh: int content_get_historical_file( 61ddd63b72 2008-03-06 drh: const char *revision, /* Name of the baseline containing the file */ 61ddd63b72 2008-03-06 drh: const char *file, /* Name of the file */ 61ddd63b72 2008-03-06 drh: Blob *content /* Write file content here */ 61ddd63b72 2008-03-06 drh: ){ 574763bab9 2007-09-26 jnc: Blob mfile; 574763bab9 2007-09-26 jnc: Manifest m; 574763bab9 2007-09-26 jnc: int i, rid=0; 574763bab9 2007-09-26 jnc: 574763bab9 2007-09-26 jnc: rid = name_to_rid(revision); 574763bab9 2007-09-26 jnc: content_get(rid, &mfile); 574763bab9 2007-09-26 jnc: 574763bab9 2007-09-26 jnc: if( manifest_parse(&m, &mfile) ){ 574763bab9 2007-09-26 jnc: for(i=0; i<m.nFile; i++){ 574763bab9 2007-09-26 jnc: if( strcmp(m.aFile[i].zName, file)==0 ){ 574763bab9 2007-09-26 jnc: rid = uuid_to_rid(m.aFile[i].zUuid, 0); 574763bab9 2007-09-26 jnc: return content_get(rid, content); 574763bab9 2007-09-26 jnc: } 574763bab9 2007-09-26 jnc: } 574763bab9 2007-09-26 jnc: fossil_panic("file: %s does not exist in revision: %s", file, revision); 574763bab9 2007-09-26 jnc: }else{ 574763bab9 2007-09-26 jnc: fossil_panic("could not parse manifest for revision: %s", revision); 574763bab9 2007-09-26 jnc: } 574763bab9 2007-09-26 jnc: 574763bab9 2007-09-26 jnc: return 0; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** COMMAND: test-content-get dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Extract a blob from the database and write it into a file. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void test_content_get_cmd(void){ dbda8d6ce9 2007-07-21 drh: int rid; dbda8d6ce9 2007-07-21 drh: Blob content; dbda8d6ce9 2007-07-21 drh: const char *zFile; dbda8d6ce9 2007-07-21 drh: if( g.argc!=4 && g.argc!=3 ) usage("RECORDID ?FILENAME?"); dbda8d6ce9 2007-07-21 drh: zFile = g.argc==4 ? g.argv[3] : "-"; dbda8d6ce9 2007-07-21 drh: db_must_be_within_tree(); dbda8d6ce9 2007-07-21 drh: rid = name_to_rid(g.argv[2]); dbda8d6ce9 2007-07-21 drh: content_get(rid, &content); dbda8d6ce9 2007-07-21 drh: blob_write_to_file(&content, zFile); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** COMMAND: test-content-rawget dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Extract a blob from the database and write it into a file. This dbda8d6ce9 2007-07-21 drh: ** version does not expand the delta. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void test_content_rawget_cmd(void){ dbda8d6ce9 2007-07-21 drh: int rid; dbda8d6ce9 2007-07-21 drh: Blob content; dbda8d6ce9 2007-07-21 drh: const char *zFile; dbda8d6ce9 2007-07-21 drh: if( g.argc!=4 && g.argc!=3 ) usage("RECORDID ?FILENAME?"); dbda8d6ce9 2007-07-21 drh: zFile = g.argc==4 ? g.argv[3] : "-"; dbda8d6ce9 2007-07-21 drh: db_must_be_within_tree(); dbda8d6ce9 2007-07-21 drh: rid = name_to_rid(g.argv[2]); dbda8d6ce9 2007-07-21 drh: blob_zero(&content); dbda8d6ce9 2007-07-21 drh: db_blob(&content, "SELECT content FROM blob WHERE rid=%d", rid); dbda8d6ce9 2007-07-21 drh: blob_uncompress(&content, &content); dbda8d6ce9 2007-07-21 drh: blob_write_to_file(&content, zFile); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* 573a464cb7 2007-08-10 drh: ** When a record is converted from a phantom to a real record, 573a464cb7 2007-08-10 drh: ** if that record has other records that are derived by delta, 573a464cb7 2007-08-10 drh: ** then call manifest_crosslink() on those other records. 573a464cb7 2007-08-10 drh: */ 573a464cb7 2007-08-10 drh: void after_dephantomize(int rid, int linkFlag){ 573a464cb7 2007-08-10 drh: Stmt q; 573a464cb7 2007-08-10 drh: db_prepare(&q, "SELECT rid FROM delta WHERE srcid=%d", rid); 573a464cb7 2007-08-10 drh: while( db_step(&q)==SQLITE_ROW ){ 573a464cb7 2007-08-10 drh: int tid = db_column_int(&q, 0); 573a464cb7 2007-08-10 drh: after_dephantomize(tid, 1); 573a464cb7 2007-08-10 drh: } 573a464cb7 2007-08-10 drh: db_finalize(&q); 573a464cb7 2007-08-10 drh: if( linkFlag ){ 573a464cb7 2007-08-10 drh: Blob content; 573a464cb7 2007-08-10 drh: content_get(rid, &content); 573a464cb7 2007-08-10 drh: manifest_crosslink(rid, &content); 573a464cb7 2007-08-10 drh: blob_reset(&content); 573a464cb7 2007-08-10 drh: } 573a464cb7 2007-08-10 drh: } 573a464cb7 2007-08-10 drh: 573a464cb7 2007-08-10 drh: /* dbda8d6ce9 2007-07-21 drh: ** Write content into the database. Return the record ID. If the dbda8d6ce9 2007-07-21 drh: ** content is already in the database, just return the record ID. dbda8d6ce9 2007-07-21 drh: ** 573a464cb7 2007-08-10 drh: ** If srcId is specified, then pBlob is delta content from 573a464cb7 2007-08-10 drh: ** the srcId record. srcId might be a phantom. 573a464cb7 2007-08-10 drh: ** 573a464cb7 2007-08-10 drh: ** A phantom is written if pBlob==0. If pBlob==0 or if srcId is 573a464cb7 2007-08-10 drh: ** specified then the UUID is set to zUuid. Otherwise zUuid is 573a464cb7 2007-08-10 drh: ** ignored. In the future this might change such that the content 573a464cb7 2007-08-10 drh: ** hash is checked against zUuid to make sure it is correct. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** If the record already exists but is a phantom, the pBlob content dbda8d6ce9 2007-07-21 drh: ** is inserted and the phatom becomes a real record. dbda8d6ce9 2007-07-21 drh: */ 573a464cb7 2007-08-10 drh: int content_put(Blob *pBlob, const char *zUuid, int srcId){ dbda8d6ce9 2007-07-21 drh: int size; dbda8d6ce9 2007-07-21 drh: int rid; dbda8d6ce9 2007-07-21 drh: Stmt s1; dbda8d6ce9 2007-07-21 drh: Blob cmpr; dbda8d6ce9 2007-07-21 drh: Blob hash; 48c4e69d2b 2007-09-09 drh: int markAsUnclustered = 0; 61ddd63b72 2008-03-06 drh: int isDephantomize = 0; 48c4e69d2b 2007-09-09 drh: dbda8d6ce9 2007-07-21 drh: assert( g.repositoryOpen ); 573a464cb7 2007-08-10 drh: if( pBlob && srcId==0 ){ 573a464cb7 2007-08-10 drh: sha1sum_blob(pBlob, &hash); 573a464cb7 2007-08-10 drh: }else{ dbda8d6ce9 2007-07-21 drh: blob_init(&hash, zUuid, -1); 573a464cb7 2007-08-10 drh: } 573a464cb7 2007-08-10 drh: if( pBlob==0 ){ dbda8d6ce9 2007-07-21 drh: size = -1; dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: size = blob_size(pBlob); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: db_begin_transaction(); dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* Check to see if the entry already exists and if it does whether dbda8d6ce9 2007-07-21 drh: ** or not the entry is a phantom dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: db_prepare(&s1, "SELECT rid, size FROM blob WHERE uuid=%B", &hash); dbda8d6ce9 2007-07-21 drh: if( db_step(&s1)==SQLITE_ROW ){ dbda8d6ce9 2007-07-21 drh: rid = db_column_int(&s1, 0); dbda8d6ce9 2007-07-21 drh: if( db_column_int(&s1, 1)>=0 || pBlob==0 ){ dbda8d6ce9 2007-07-21 drh: /* Either the entry is not a phantom or it is a phantom but we 8857e1eabb 2007-08-28 aku: ** have no data with which to dephantomize it. In either case, 8857e1eabb 2007-08-28 aku: ** there is nothing for us to do other than return the RID. */ dbda8d6ce9 2007-07-21 drh: db_finalize(&s1); dbda8d6ce9 2007-07-21 drh: db_end_transaction(0); dbda8d6ce9 2007-07-21 drh: return rid; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: rid = 0; /* No entry with the same UUID currently exists */ 48c4e69d2b 2007-09-09 drh: markAsUnclustered = 1; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: db_finalize(&s1); dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* Construct a received-from ID if we do not already have one */ dbda8d6ce9 2007-07-21 drh: if( g.rcvid==0 && pBlob!=0 ){ dbda8d6ce9 2007-07-21 drh: db_multi_exec( dbda8d6ce9 2007-07-21 drh: "INSERT INTO rcvfrom(uid, mtime, nonce, ipaddr)" dbda8d6ce9 2007-07-21 drh: "VALUES(%d, julianday('now'), %Q, %Q)", dbda8d6ce9 2007-07-21 drh: g.userUid, g.zNonce, g.zIpAddr dbda8d6ce9 2007-07-21 drh: ); dbda8d6ce9 2007-07-21 drh: g.rcvid = db_last_insert_rowid(); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: if( rid>0 ){ dbda8d6ce9 2007-07-21 drh: /* We are just adding data to a phantom */ dbda8d6ce9 2007-07-21 drh: assert( pBlob!=0 ); dbda8d6ce9 2007-07-21 drh: db_prepare(&s1, dbda8d6ce9 2007-07-21 drh: "UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d", 75c476ccd1 2007-07-23 drh: g.rcvid, size, rid dbda8d6ce9 2007-07-21 drh: ); dbda8d6ce9 2007-07-21 drh: blob_compress(pBlob, &cmpr); dbda8d6ce9 2007-07-21 drh: db_bind_blob(&s1, ":data", &cmpr); dbda8d6ce9 2007-07-21 drh: db_exec(&s1); 73bddaebb9 2007-08-09 drh: db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid); 61ddd63b72 2008-03-06 drh: if( srcId==0 || content_is_available(srcId) ){ 61ddd63b72 2008-03-06 drh: isDephantomize = 1; 61ddd63b72 2008-03-06 drh: content_mark_available(rid); 573a464cb7 2007-08-10 drh: } dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: /* We are creating a new entry */ dbda8d6ce9 2007-07-21 drh: db_prepare(&s1, dbda8d6ce9 2007-07-21 drh: "INSERT INTO blob(rcvid,size,uuid,content)" 573a464cb7 2007-08-10 drh: "VALUES(%d,%d,'%b',:data)", 573a464cb7 2007-08-10 drh: g.rcvid, size, &hash dbda8d6ce9 2007-07-21 drh: ); dbda8d6ce9 2007-07-21 drh: if( pBlob ){ dbda8d6ce9 2007-07-21 drh: blob_compress(pBlob, &cmpr); dbda8d6ce9 2007-07-21 drh: db_bind_blob(&s1, ":data", &cmpr); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: db_exec(&s1); dbda8d6ce9 2007-07-21 drh: rid = db_last_insert_rowid(); 73bddaebb9 2007-08-09 drh: if( !pBlob ){ 73bddaebb9 2007-08-09 drh: db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid); 73bddaebb9 2007-08-09 drh: } 73bddaebb9 2007-08-09 drh: } 73bddaebb9 2007-08-09 drh: 573a464cb7 2007-08-10 drh: /* If the srcId is specified, then the data we just added is 573a464cb7 2007-08-10 drh: ** really a delta. Record this fact in the delta table. 573a464cb7 2007-08-10 drh: */ 573a464cb7 2007-08-10 drh: if( srcId ){ 573a464cb7 2007-08-10 drh: db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId); e1c1877c99 2007-09-08 drh: } 61ddd63b72 2008-03-06 drh: if( !isDephantomize && bag_find(&contentCache.missing, rid) && 61ddd63b72 2008-03-06 drh: (srcId==0 || content_is_available(srcId)) ){ 61ddd63b72 2008-03-06 drh: content_mark_available(rid); 61ddd63b72 2008-03-06 drh: } 61ddd63b72 2008-03-06 drh: if( isDephantomize ){ 61ddd63b72 2008-03-06 drh: after_dephantomize(rid, 0); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: 48c4e69d2b 2007-09-09 drh: /* Add the element to the unclustered table if has never been 48c4e69d2b 2007-09-09 drh: ** previously seen. e1c1877c99 2007-09-08 drh: */ 48c4e69d2b 2007-09-09 drh: if( markAsUnclustered ){ e1c1877c99 2007-09-08 drh: db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid); 573a464cb7 2007-08-10 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* Finish the transaction and cleanup */ dbda8d6ce9 2007-07-21 drh: db_finalize(&s1); dbda8d6ce9 2007-07-21 drh: db_end_transaction(0); dbda8d6ce9 2007-07-21 drh: blob_reset(&hash); dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* Make arrangements to verify that the data can be recovered dbda8d6ce9 2007-07-21 drh: ** before we commit */ dbda8d6ce9 2007-07-21 drh: if( pBlob ){ dbda8d6ce9 2007-07-21 drh: blob_reset(&cmpr); dbda8d6ce9 2007-07-21 drh: verify_before_commit(rid); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: return rid; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** COMMAND: test-content-put dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Extract a blob from the database and write it into a file. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void test_content_put_cmd(void){ dbda8d6ce9 2007-07-21 drh: int rid; dbda8d6ce9 2007-07-21 drh: Blob content; dbda8d6ce9 2007-07-21 drh: if( g.argc!=3 ) usage("FILENAME"); dbda8d6ce9 2007-07-21 drh: db_must_be_within_tree(); dbda8d6ce9 2007-07-21 drh: user_select(); dbda8d6ce9 2007-07-21 drh: blob_read_from_file(&content, g.argv[2]); 573a464cb7 2007-08-10 drh: rid = content_put(&content, 0, 0); dbda8d6ce9 2007-07-21 drh: printf("inserted as record %d\n", rid); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Make sure the content at rid is the original content and is not a dbda8d6ce9 2007-07-21 drh: ** delta. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void content_undelta(int rid){ 573a464cb7 2007-08-10 drh: if( findSrcid(rid)>0 ){ dbda8d6ce9 2007-07-21 drh: Blob x; 573a464cb7 2007-08-10 drh: if( content_get(rid, &x) ){ 573a464cb7 2007-08-10 drh: Stmt s; 573a464cb7 2007-08-10 drh: db_prepare(&s, "UPDATE blob SET content=:c, size=%d WHERE rid=%d", 573a464cb7 2007-08-10 drh: blob_size(&x), rid); 573a464cb7 2007-08-10 drh: blob_compress(&x, &x); 573a464cb7 2007-08-10 drh: db_bind_blob(&s, ":c", &x); 573a464cb7 2007-08-10 drh: db_exec(&s); 573a464cb7 2007-08-10 drh: db_finalize(&s); 573a464cb7 2007-08-10 drh: blob_reset(&x); 573a464cb7 2007-08-10 drh: db_multi_exec("DELETE FROM delta WHERE rid=%d", rid); 573a464cb7 2007-08-10 drh: } dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** COMMAND: test-content-undelta dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Make sure the content at RECORDID is not a delta dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void test_content_undelta_cmd(void){ dbda8d6ce9 2007-07-21 drh: int rid; dbda8d6ce9 2007-07-21 drh: if( g.argc!=2 ) usage("RECORDID"); dbda8d6ce9 2007-07-21 drh: db_must_be_within_tree(); dbda8d6ce9 2007-07-21 drh: rid = atoi(g.argv[2]); dbda8d6ce9 2007-07-21 drh: content_undelta(rid); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Change the storage of rid so that it is a delta of srcid. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** If rid is already a delta from some other place then no dbda8d6ce9 2007-07-21 drh: ** conversion occurs and this is a no-op unless force==1. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** If srcid is a delta that depends on rid, then srcid is dbda8d6ce9 2007-07-21 drh: ** converted to undeltaed text. 8857e1eabb 2007-08-28 aku: ** 8857e1eabb 2007-08-28 aku: ** If either rid or srcid contain less than 50 bytes, or if the 8857e1eabb 2007-08-28 aku: ** resulting delta does not achieve a compression of at least 25% on 8857e1eabb 2007-08-28 aku: ** its own the rid is left untouched. 8857e1eabb 2007-08-28 aku: ** 8857e1eabb 2007-08-28 aku: ** NOTE: IMHO the creation of the delta should be defered until after 8857e1eabb 2007-08-28 aku: ** the blob sizes have been checked. Doing it before the check as is 8857e1eabb 2007-08-28 aku: ** done now the code will generate a delta just to immediately throw 8857e1eabb 2007-08-28 aku: ** it away, wasting space and time. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void content_deltify(int rid, int srcid, int force){ dbda8d6ce9 2007-07-21 drh: int s; dbda8d6ce9 2007-07-21 drh: Blob data, src, delta; e3c529c2f0 2007-07-30 anonymous: Stmt s1, s2; dbda8d6ce9 2007-07-21 drh: if( srcid==rid ) return; 573a464cb7 2007-08-10 drh: if( !force && findSrcid(rid)>0 ) return; dbda8d6ce9 2007-07-21 drh: s = srcid; 573a464cb7 2007-08-10 drh: while( (s = findSrcid(s))>0 ){ dbda8d6ce9 2007-07-21 drh: if( s==rid ){ dbda8d6ce9 2007-07-21 drh: content_undelta(srcid); dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: content_get(srcid, &src); dbda8d6ce9 2007-07-21 drh: content_get(rid, &data); dbda8d6ce9 2007-07-21 drh: blob_delta_create(&src, &data, &delta); dbda8d6ce9 2007-07-21 drh: if( blob_size(&src)>=50 && blob_size(&data)>=50 && dbda8d6ce9 2007-07-21 drh: blob_size(&delta) < blob_size(&data)*0.75 ){ dbda8d6ce9 2007-07-21 drh: blob_compress(&delta, &delta); e3c529c2f0 2007-07-30 anonymous: db_prepare(&s1, "UPDATE blob SET content=:data WHERE rid=%d", rid); 3a25b68390 2007-07-30 drh: db_prepare(&s2, "REPLACE INTO delta(rid,srcid)VALUES(%d,%d)", rid, srcid); 3a25b68390 2007-07-30 drh: db_bind_blob(&s1, ":data", &delta); dbda8d6ce9 2007-07-21 drh: db_begin_transaction(); dbda8d6ce9 2007-07-21 drh: db_exec(&s1); dbda8d6ce9 2007-07-21 drh: db_exec(&s2); dbda8d6ce9 2007-07-21 drh: db_end_transaction(0); e3c529c2f0 2007-07-30 anonymous: db_finalize(&s1); e3c529c2f0 2007-07-30 anonymous: db_finalize(&s2); 71ed18cc4b 2007-08-01 drh: verify_before_commit(rid); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: blob_reset(&src); dbda8d6ce9 2007-07-21 drh: blob_reset(&data); dbda8d6ce9 2007-07-21 drh: blob_reset(&delta); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** COMMAND: test-content-deltify dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Convert the content at RID into a delta from SRCID. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void test_content_deltify_cmd(void){ dbda8d6ce9 2007-07-21 drh: if( g.argc!=5 ) usage("RID SRCID FORCE"); dbda8d6ce9 2007-07-21 drh: db_must_be_within_tree(); dbda8d6ce9 2007-07-21 drh: content_deltify(atoi(g.argv[2]), atoi(g.argv[3]), atoi(g.argv[4])); dbda8d6ce9 2007-07-21 drh: }