Check-in [ba486fec5a]
Not logged in
Overview

SHA1 Hash:ba486fec5a2b429fbf5fcff8feaef90c08ec8619
Date: 2007-09-03 01:28:57
User: drh
Comment:Add the unclustered table and the M lines to manifests. Process clusters in manifest_parse and manifest_crossref. The xfer process still does not yet use clusters though so it is still compatible. But that is about to change.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified ideas.txt from [a6d94120b0] to [084c43a653].

@@ -1,5 +1,105 @@
+Possible ticket file format:
+
+   "Ticket"
+   title: TEXT
+   ticketid: TEXT
+   exists-in: BASELINE   -- 0 or more
+   fixed-in: BASELINE    -- 0 or more
+   tag: TAG              -- 0 or more
+   created: DATETIME
+   attachment: FILENAME DESCRIPTION
+   parent: UUID*
+   derived-from: TICKET-FILENAME
+   description: MULTILINE-TEXT
+   remarks: MULTILINE-TEXT
+
+   * Things handles with tags:
+     created-by
+     assigned-to
+     priority
+     severity
+     target-release
+     status
+     resolution
+     type
+     subsystem
+
+Wiki header format:
+   "WikiPage"
+   parent: UUID*
+   title: TEXT
+   pagename: TEXT
+   mode: (readonly|appendonly|readwrite)
+   attachment: UUID name description
+
+   * Header ends with a blank line.  wiki content follows.
+
+Cluster format:
+
+   "Cluster"
+   (UUID\n)+
+
+   * Cluster generated in server mode only.
+   * Embargo cluster that reference phantoms or other embargoed clusters.
+   * Never send or ihave an embargoed cluster
+
+New sync algorithm based on clusters:
+
+   * Keep a table of unclustered artifacts.  Strive to keep this table
+     less than 100 entries.
+   * Client sends content of unclustered table as ihaves to server
+   * Server builds a new cluster if size of cluster table >100.
+   * Server sends unclustered table to client
+   * Server sends gimme for all unknown ihave received from client
+   * Client sends gimme for all unknown ihave received from server
+   * Previous two steps repeat until no more gimmes
+
+Details of new push algorithm:
+
+   * Table "unsent" contains all files never pushed
+   * TEMP table "wanted" contains files the server does not have
+   Loop:
+     * Client sends login and "push" record
+     * Client sends file message for all files in unsent and removes
+       those files from the table.
+     * Client sends file message for all files in wanted.
+     * Client sends ihave messages for each entry in unclustered
+     ------
+     * Server receives file message
+     * Server creates phantoms for unknown ihaves
+     * Server sends gimme messages for all phantoms
+     ------
+     * For each gimme message add an entry to wanted
+     * Halt if the wanted table is empty
+
+Details on new pull algorithm:
+
+   Loop:
+     * Client sends login and "pull" record
+     * Client sends "prior" message with repository id and max record number
+     * Client sends "gimme" for each phantom
+     --------
+     * Server creates new clusters to get unclustered size below 100
+     * If there is "prior" message with repository id that matches this
+       server, then send file messages for all record ids greater than
+       prior
+     * Server sends ihave messages for each entry in unclustered
+     * Server sends maxrid message
+     --------
+     * Client receives file records
+     * Client creates phantoms for unknown ihaves
+     * If no phantoms exist, record maxrid for the server and halt
+
+Need a dephantomize algorithm
+
+
+Auxiliary tables needed for new sync algorithm:
+
+   * unsent:  files that have never been sent to another repository
+   * unclustered: non-phantom files not mentioned by a cluster
+
 Random thoughts:
 
   *  Changes to manifest to support:
      +  Trees of wiki pages and tickets
      +  The ability to cap or close a branch

Modified src/manifest.c from [2a7dfdcb16] to [b1ae7fa07b].

@@ -27,11 +27,11 @@
 #include "manifest.h"
 #include <assert.h>
 
 #if INTERFACE
 /*
-** A parsed manifest
+** A parsed manifest or cluster.
 */
 struct Manifest {
   Blob content;         /* The original content blob */
   char *zComment;       /* Decoded comment */
   double rDate;         /* Time in the "D" line */
@@ -44,10 +44,13 @@
     char *zUuid;           /* UUID of the file */
   } *aFile;
   int nParent;          /* Number of parents */
   int nParentAlloc;     /* Slots allocated in azParent[] */
   char **azParent;      /* UUIDs of parents */
+  int nCChild;          /* Number of cluster children */
+  int nCChildAlloc;     /* Number of closts allocated in azCChild[] */
+  char **azCChild;      /* UUIDs of referenced objects in a cluster */
 };
 #endif
 
 
 /*
@@ -55,10 +58,11 @@
 */
 void manifest_clear(Manifest *p){
   blob_reset(&p->content);
   free(p->aFile);
   free(p->azParent);
+  free(p->azCChild);
   memset(p, 0, sizeof(*p));
 }
 
 /*
 ** Parse a manifest blob into a Manifest object.  The Manifest
@@ -140,10 +144,28 @@
       if( p->rDate!=0.0 ) goto manifest_syntax_error;
       if( blob_token(&line, &a1)==0 ) goto manifest_syntax_error;
       if( blob_token(&line, &a2)!=0 ) goto manifest_syntax_error;
       zDate = blob_terminate(&a1);
       p->rDate = db_double(0.0, "SELECT julianday(%Q)", zDate);
+    }else if( z[0]=='M' ){
+      char *zUuid;
+      md5sum_step_text(blob_buffer(&line), blob_size(&line));
+      if( blob_token(&line, &a1)==0 ) goto manifest_syntax_error;
+      zUuid = blob_terminate(&a1);
+      if( blob_size(&a1)!=UUID_SIZE ) goto manifest_syntax_error;
+      if( !validate16(zUuid, UUID_SIZE) ) goto manifest_syntax_error;
+      if( p->nCChild>=p->nCChildAlloc ){
+        p->nCChildAlloc = p->nCChildAlloc*2 + 10;
+        p->azCChild =
+           realloc(p->azCChild, p->nCChildAlloc*sizeof(p->azCChild[0]) );
+        if( p->azCChild==0 ) fossil_panic("out of memory");
+      }
+      i = p->nCChild++;
+      p->azCChild[i] = zUuid;
+      if( i>0 && strcmp(p->azCChild[i-1], zUuid)>=0 ){
+        goto manifest_syntax_error;
+      }
     }else if( z[0]=='U' ){
       md5sum_step_text(blob_buffer(&line), blob_size(&line));
       if( p->zUser!=0 ) goto manifest_syntax_error;
       if( blob_token(&line, &a1)==0 ) goto manifest_syntax_error;
       if( blob_token(&line, &a2)!=0 ) goto manifest_syntax_error;
@@ -291,10 +313,14 @@
 
 /*
 ** Scan record rid/pContent to see if it is a manifest.  If
 ** it is a manifest, then populate the mlink, plink,
 ** filename, and event tables with cross-reference information.
+**
+** (Later:) Also check to see if pContent is a cluster.  If it
+** is a cluster then remove all referenced elements from the
+** unclustered table and create phantoms for any unknown elements.
 */
 int manifest_crosslink(int rid, Blob *pContent){
   int i;
   Manifest m;
   Stmt q;
@@ -321,10 +347,20 @@
     db_multi_exec(
       "INSERT INTO event(type,mtime,objid,user,comment)"
       "VALUES('ci',%.17g,%d,%Q,%Q)",
       m.rDate, rid, m.zUser, m.zComment
     );
+  }
+  for(i=0; i<m.nCChild; i++){
+    static Stmt dc;
+    db_static_prepare(&dc,
+      "DELETE FROM unclustered WHERE rid ="
+      " (SELECT rid FROM blob WHERE uuid=:u)"
+    );
+    db_bind_text(&dc, ":u", m.azCChild[i]);
+    db_step(&dc);
+    db_reset(&dc);
   }
   db_end_transaction(0);
   manifest_clear(&m);
   return 1;
 }

Modified src/rebuild.c from [104f7b4bf8] to [fab565baa9].

@@ -52,10 +52,14 @@
     db_multi_exec("DROP TABLE %Q", zTable);
     free(zTable);
   }
   db_multi_exec(zRepositorySchema2);
 
+  db_multi_exec("INSERT INTO unclustered SELECT rid FROM blob");
+  db_multi_exec(
+    "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')"
+  );
   db_prepare(&s, "SELECT rid, size FROM blob");
   while( db_step(&s)==SQLITE_ROW ){
     int rid = db_column_int(&s, 0);
     int size = db_column_int(&s, 1);
     if( size>=0 ){

Modified src/schema.c from [c780fb4f32] to [7efe24b157].

@@ -165,10 +165,26 @@
 @ CREATE INDEX event_i2 ON event(objid);
 @
 @ -- A record of phantoms
 @ --
 @ CREATE TABLE phantom(
+@   rid INTEGER PRIMARY KEY         -- Record ID of the phantom
+@ );
+@
+@ -- Unclustered records.  An unclustered record is a record (including
+@ -- a cluster records themselves) that is not mentioned by some other
+@ -- cluster.
+@ --
+@ CREATE TABLE unclustered(
+@   rid INTEGER PRIMARY KEY         -- Record ID of the unclustered file
+@ );
+@
+@ -- Records which have never been pushed to another server.  This is
+@ -- used to reduce push operations to a single HTTP request in the
+@ -- common case when one repository only talks to a single server.
+@ --
+@ CREATE TABLE unsent(
 @   rid INTEGER PRIMARY KEY         -- Record ID of the phantom
 @ );
 @
 @ -- Aggregated ticket information
 @ --