Overview
SHA1 Hash: | ba486fec5a2b429fbf5fcff8feaef90c08ec8619 |
---|---|
Date: | 2007-09-03 01:28:57 |
User: | drh |
Comment: | Add the unclustered table and the M lines to manifests. Process clusters in manifest_parse and manifest_crossref. The xfer process still does not yet use clusters though so it is still compatible. But that is about to change. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified ideas.txt from [a6d94120b0] to [084c43a653].
@@ -1,5 +1,105 @@ +Possible ticket file format: + + "Ticket" + title: TEXT + ticketid: TEXT + exists-in: BASELINE -- 0 or more + fixed-in: BASELINE -- 0 or more + tag: TAG -- 0 or more + created: DATETIME + attachment: FILENAME DESCRIPTION + parent: UUID* + derived-from: TICKET-FILENAME + description: MULTILINE-TEXT + remarks: MULTILINE-TEXT + + * Things handles with tags: + created-by + assigned-to + priority + severity + target-release + status + resolution + type + subsystem + +Wiki header format: + "WikiPage" + parent: UUID* + title: TEXT + pagename: TEXT + mode: (readonly|appendonly|readwrite) + attachment: UUID name description + + * Header ends with a blank line. wiki content follows. + +Cluster format: + + "Cluster" + (UUID\n)+ + + * Cluster generated in server mode only. + * Embargo cluster that reference phantoms or other embargoed clusters. + * Never send or ihave an embargoed cluster + +New sync algorithm based on clusters: + + * Keep a table of unclustered artifacts. Strive to keep this table + less than 100 entries. + * Client sends content of unclustered table as ihaves to server + * Server builds a new cluster if size of cluster table >100. + * Server sends unclustered table to client + * Server sends gimme for all unknown ihave received from client + * Client sends gimme for all unknown ihave received from server + * Previous two steps repeat until no more gimmes + +Details of new push algorithm: + + * Table "unsent" contains all files never pushed + * TEMP table "wanted" contains files the server does not have + Loop: + * Client sends login and "push" record + * Client sends file message for all files in unsent and removes + those files from the table. + * Client sends file message for all files in wanted. + * Client sends ihave messages for each entry in unclustered + ------ + * Server receives file message + * Server creates phantoms for unknown ihaves + * Server sends gimme messages for all phantoms + ------ + * For each gimme message add an entry to wanted + * Halt if the wanted table is empty + +Details on new pull algorithm: + + Loop: + * Client sends login and "pull" record + * Client sends "prior" message with repository id and max record number + * Client sends "gimme" for each phantom + -------- + * Server creates new clusters to get unclustered size below 100 + * If there is "prior" message with repository id that matches this + server, then send file messages for all record ids greater than + prior + * Server sends ihave messages for each entry in unclustered + * Server sends maxrid message + -------- + * Client receives file records + * Client creates phantoms for unknown ihaves + * If no phantoms exist, record maxrid for the server and halt + +Need a dephantomize algorithm + + +Auxiliary tables needed for new sync algorithm: + + * unsent: files that have never been sent to another repository + * unclustered: non-phantom files not mentioned by a cluster + Random thoughts: * Changes to manifest to support: + Trees of wiki pages and tickets + The ability to cap or close a branch
Modified src/manifest.c from [2a7dfdcb16] to [b1ae7fa07b].
@@ -27,11 +27,11 @@ #include "manifest.h" #include <assert.h> #if INTERFACE /* -** A parsed manifest +** A parsed manifest or cluster. */ struct Manifest { Blob content; /* The original content blob */ char *zComment; /* Decoded comment */ double rDate; /* Time in the "D" line */ @@ -44,10 +44,13 @@ char *zUuid; /* UUID of the file */ } *aFile; int nParent; /* Number of parents */ int nParentAlloc; /* Slots allocated in azParent[] */ char **azParent; /* UUIDs of parents */ + int nCChild; /* Number of cluster children */ + int nCChildAlloc; /* Number of closts allocated in azCChild[] */ + char **azCChild; /* UUIDs of referenced objects in a cluster */ }; #endif /* @@ -55,10 +58,11 @@ */ void manifest_clear(Manifest *p){ blob_reset(&p->content); free(p->aFile); free(p->azParent); + free(p->azCChild); memset(p, 0, sizeof(*p)); } /* ** Parse a manifest blob into a Manifest object. The Manifest @@ -140,10 +144,28 @@ if( p->rDate!=0.0 ) goto manifest_syntax_error; if( blob_token(&line, &a1)==0 ) goto manifest_syntax_error; if( blob_token(&line, &a2)!=0 ) goto manifest_syntax_error; zDate = blob_terminate(&a1); p->rDate = db_double(0.0, "SELECT julianday(%Q)", zDate); + }else if( z[0]=='M' ){ + char *zUuid; + md5sum_step_text(blob_buffer(&line), blob_size(&line)); + if( blob_token(&line, &a1)==0 ) goto manifest_syntax_error; + zUuid = blob_terminate(&a1); + if( blob_size(&a1)!=UUID_SIZE ) goto manifest_syntax_error; + if( !validate16(zUuid, UUID_SIZE) ) goto manifest_syntax_error; + if( p->nCChild>=p->nCChildAlloc ){ + p->nCChildAlloc = p->nCChildAlloc*2 + 10; + p->azCChild = + realloc(p->azCChild, p->nCChildAlloc*sizeof(p->azCChild[0]) ); + if( p->azCChild==0 ) fossil_panic("out of memory"); + } + i = p->nCChild++; + p->azCChild[i] = zUuid; + if( i>0 && strcmp(p->azCChild[i-1], zUuid)>=0 ){ + goto manifest_syntax_error; + } }else if( z[0]=='U' ){ md5sum_step_text(blob_buffer(&line), blob_size(&line)); if( p->zUser!=0 ) goto manifest_syntax_error; if( blob_token(&line, &a1)==0 ) goto manifest_syntax_error; if( blob_token(&line, &a2)!=0 ) goto manifest_syntax_error; @@ -291,10 +313,14 @@ /* ** Scan record rid/pContent to see if it is a manifest. If ** it is a manifest, then populate the mlink, plink, ** filename, and event tables with cross-reference information. +** +** (Later:) Also check to see if pContent is a cluster. If it +** is a cluster then remove all referenced elements from the +** unclustered table and create phantoms for any unknown elements. */ int manifest_crosslink(int rid, Blob *pContent){ int i; Manifest m; Stmt q; @@ -321,10 +347,20 @@ db_multi_exec( "INSERT INTO event(type,mtime,objid,user,comment)" "VALUES('ci',%.17g,%d,%Q,%Q)", m.rDate, rid, m.zUser, m.zComment ); + } + for(i=0; i<m.nCChild; i++){ + static Stmt dc; + db_static_prepare(&dc, + "DELETE FROM unclustered WHERE rid =" + " (SELECT rid FROM blob WHERE uuid=:u)" + ); + db_bind_text(&dc, ":u", m.azCChild[i]); + db_step(&dc); + db_reset(&dc); } db_end_transaction(0); manifest_clear(&m); return 1; }
Modified src/rebuild.c from [104f7b4bf8] to [fab565baa9].
@@ -52,10 +52,14 @@ db_multi_exec("DROP TABLE %Q", zTable); free(zTable); } db_multi_exec(zRepositorySchema2); + db_multi_exec("INSERT INTO unclustered SELECT rid FROM blob"); + db_multi_exec( + "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')" + ); db_prepare(&s, "SELECT rid, size FROM blob"); while( db_step(&s)==SQLITE_ROW ){ int rid = db_column_int(&s, 0); int size = db_column_int(&s, 1); if( size>=0 ){
Modified src/schema.c from [c780fb4f32] to [7efe24b157].
@@ -165,10 +165,26 @@ @ CREATE INDEX event_i2 ON event(objid); @ @ -- A record of phantoms @ -- @ CREATE TABLE phantom( +@ rid INTEGER PRIMARY KEY -- Record ID of the phantom +@ ); +@ +@ -- Unclustered records. An unclustered record is a record (including +@ -- a cluster records themselves) that is not mentioned by some other +@ -- cluster. +@ -- +@ CREATE TABLE unclustered( +@ rid INTEGER PRIMARY KEY -- Record ID of the unclustered file +@ ); +@ +@ -- Records which have never been pushed to another server. This is +@ -- used to reduce push operations to a single HTTP request in the +@ -- common case when one repository only talks to a single server. +@ -- +@ CREATE TABLE unsent( @ rid INTEGER PRIMARY KEY -- Record ID of the phantom @ ); @ @ -- Aggregated ticket information @ --