Check-in [243e02bfbd]
Not logged in
Overview

SHA1 Hash:243e02bfbdf5629131d8c6b0709e538ca8a33897
Date: 2008-05-18 15:51:05
User: drh
Comment:Improvements to cloning performance.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified src/clone.c from [0c2a5c8339] to [ebfd80e735].

@@ -74,10 +74,18 @@
                     zTab, zTab);
     }
     db_finalize(&q);
   }else{
     url_enable_proxy(0);
+    g.xlinkClusterOnly = 1;
     client_sync(0,0,1);
+    g.xlinkClusterOnly = 0;
   }
   verify_cancel();
+  db_end_transaction(0);
+  db_close();
+  db_open_repository(g.argv[3]);
+  db_begin_transaction();
+  printf("Rebuilding repository meta-data...\n");
+  rebuild_db(0, 1);
   db_end_transaction(0);
 }

Modified src/content.c from [837378bf40] to [c3fcdbc5dd].

@@ -365,13 +365,13 @@
 ** content is already in the database, just return the record ID.
 **
 ** If srcId is specified, then pBlob is delta content from
 ** the srcId record.  srcId might be a phantom.
 **
-** If srcId is specified then the UUID is set to zUuid.  Otherwise zUuid is
-** ignored.  In the future this might change such that the content
-** hash is checked against zUuid to make sure it is correct.
+** zUuid is the UUID of the artifact, if it is specified.  When srcId is
+** specified then zUuid must always be specified.  If srcId is zero,
+** and zUuid is zero then the correct zUuid is computed from pBlob.
 **
 ** If the record already exists but is a phantom, the pBlob content
 ** is inserted and the phatom becomes a real record.
 */
 int content_put(Blob *pBlob, const char *zUuid, int srcId){
@@ -383,11 +383,13 @@
   int markAsUnclustered = 0;
   int isDephantomize = 0;
 
   assert( g.repositoryOpen );
   assert( pBlob!=0 );
-  if( srcId==0 ){
+  assert( srcId==0 || zUuid!=0 );
+  if( zUuid==0 ){
+    assert( pBlob!=0 );
     sha1sum_blob(pBlob, &hash);
   }else{
     blob_init(&hash, zUuid, -1);
   }
   size = blob_size(pBlob);

Modified src/main.c from [7b16a4ea5c] to [c15347d3f4].

@@ -74,10 +74,11 @@
   Blob cgiIn;             /* Input to an xfer www method */
   int cgiPanic;           /* Write error messages to CGI */
   Th_Interp *interp;      /* The TH1 interpreter */
   FILE *httpIn;           /* Accept HTTP input from here */
   FILE *httpOut;          /* Send HTTP output here */
+  int xlinkClusterOnly;   /* Set when cloning.  Only process clusters */
 
   int *aCommitFile;       /* Array of files to be committed */
 
   int urlIsFile;          /* True if a "file:" url */
   char *urlName;          /* Hostname for http: or filename for file: */

Modified src/manifest.c from [8e8e54e6bf] to [b08b58d9b8].

@@ -754,25 +754,42 @@
   }
   manifest_clear(&other);
 }
 
 /*
-** Scan record rid/pContent to see if it is a manifest.  If
-** it is a manifest, then populate the mlink, plink,
-** filename, and event tables with cross-reference information.
+** Scan artifact rid/pContent to see if it is a control artifact of
+** any key:
+**
+**      *  Manifest
+**      *  Control
+**      *  Wiki Page
+**      *  Ticket Change
+**      *  Cluster
+**
+** If the input is a control artifact, then make appropriate entries
+** in the auxiliary tables of the database in order to crosslink the
+** artifact.
+**
+** If global variable g.xlinkClusterOnly is true, then ignore all
+** control artifacts other than clusters.
 **
-** (Later:) Also check to see if pContent is a cluster.  If it
-** is a cluster then remove all referenced elements from the
-** unclustered table and create phantoms for any unknown elements.
+** Historical note:  This routine original processed manifests only.
+** Processing for other control artifacts was added later.  The name
+** of the routine, "manifest_crosslink", and the name of this source
+** file, is a legacy of its original use.
 */
 int manifest_crosslink(int rid, Blob *pContent){
   int i;
   Manifest m;
   Stmt q;
   int parentid = 0;
 
   if( manifest_parse(&m, pContent)==0 ){
+    return 0;
+  }
+  if( g.xlinkClusterOnly && m.type!=CFTYPE_CLUSTER ){
+    manifest_clear(&m);
     return 0;
   }
   db_begin_transaction();
   if( m.type==CFTYPE_MANIFEST ){
     if( !db_exists("SELECT 1 FROM mlink WHERE mid=%d", rid) ){

Modified src/schema.c from [f7d41345c4] to [c2d5e134b5].

@@ -73,12 +73,13 @@
 @ --
 @ CREATE TABLE blob(
 @   rid INTEGER PRIMARY KEY,        -- Record ID
 @   rcvid INTEGER,                  -- Origin of this record
 @   size INTEGER,                   -- Size of content. -1 for a phantom.
-@   uuid TEXT UNIQUE,               -- SHA1 hash of the content
-@   content BLOB                    -- Compressed content of this record
+@   uuid TEXT UNIQUE NOT NULL,      -- SHA1 hash of the content
+@   content BLOB,                   -- Compressed content of this record
+@   CHECK( length(uuid)==40 AND rid>0 )
 @ );
 @ CREATE TABLE delta(
 @   rid INTEGER PRIMARY KEY,                 -- Record ID
 @   srcid INTEGER NOT NULL REFERENCES blob   -- Record holding source document
 @ );

Modified src/xfer.c from [be750bc3bc] to [fdac8fe810].

@@ -65,12 +65,12 @@
   Blob *pOut;         /* Compose our reply here */
   Blob line;          /* The current line of input */
   Blob aToken[5];     /* Tokenized version of line */
   Blob err;           /* Error message text */
   int nToken;         /* Number of tokens in line */
-  int nIGotSent;      /* Number of "igot" messages sent */
-  int nGimmeSent;     /* Number of gimme messages sent */
+  int nIGotSent;      /* Number of "igot" cards sent */
+  int nGimmeSent;     /* Number of gimme cards sent */
   int nFileSent;      /* Number of files sent */
   int nDeltaSent;     /* Number of deltas sent */
   int nFileRcvd;      /* Number of files received */
   int nDeltaRcvd;     /* Number of deltas received */
   int nDanglingFile;  /* Number of dangling deltas received */
@@ -170,15 +170,16 @@
   }
   sha1sum_blob(&content, &hash);
   if( !blob_eq_str(&pXfer->aToken[1], blob_str(&hash), -1) ){
     blob_appendf(&pXfer->err, "content does not match sha1 hash");
   }
-  blob_reset(&hash);
-  rid = content_put(&content, 0, 0);
+  rid = content_put(&content, blob_str(&hash), 0);
+  blob_reset(&hash);
   if( rid==0 ){
     blob_appendf(&pXfer->err, "%s", g.zErrMsg);
   }else{
+    /* db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid); */
     manifest_crosslink(rid, &content);
   }
   remote_has(rid);
 }
 
@@ -460,11 +461,11 @@
   blob_reset(&cluster);
 }
 
 /*
 ** Send an igot message for every entry in unclustered table.
-** Return the number of messages sent.
+** Return the number of cards sent.
 */
 static int send_unclustered(Xfer *pXfer){
   Stmt q;
   int cnt = 0;
   db_prepare(&q,
@@ -475,10 +476,22 @@
     blob_appendf(pXfer->pOut, "igot %s\n", db_column_text(&q, 0));
     cnt++;
   }
   db_finalize(&q);
   return cnt;
+}
+
+/*
+** Send an igot message for every artifact.
+*/
+static void send_all(Xfer *pXfer){
+  Stmt q;
+  db_prepare(&q, "SELECT uuid FROM blob");
+  while( db_step(&q)==SQLITE_ROW ){
+    blob_appendf(pXfer->pOut, "igot %s\n", db_column_text(&q, 0));
+  }
+  db_finalize(&q);
 }
 
 /*
 ** If this variable is set, disable login checks.  Used for debugging
 ** only.
@@ -496,10 +509,12 @@
   int isPull = 0;
   int isPush = 0;
   int nErr = 0;
   Xfer xfer;
   int deltaFlag = 0;
+  int isClone = 0;
+  int nGimme = 0;
 
   memset(&xfer, 0, sizeof(xfer));
   blobarray_zero(xfer.aToken, count(xfer.aToken));
   cgi_set_content_type(g.zContentType);
   blob_zero(&xfer.err);
@@ -541,10 +556,11 @@
     */
     if( blob_eq(&xfer.aToken[0], "gimme")
      && xfer.nToken==2
      && blob_is_uuid(&xfer.aToken[1])
     ){
+      nGimme++;
       if( isPull ){
         int rid = rid_from_uuid(&xfer.aToken[1], 0);
         if( rid ){
           send_file(&xfer, rid, &xfer.aToken[1], deltaFlag);
         }
@@ -634,10 +650,11 @@
         cgi_reset_content();
         @ error not\sauthorized\sto\sclone
         nErr++;
         break;
       }
+      isClone = 1;
       isPull = 1;
       deltaFlag = 1;
       @ push %s(db_get("server-code", "x")) %s(db_get("project-code", "x"))
     }else
 
@@ -698,11 +715,20 @@
     blobarray_reset(xfer.aToken, xfer.nToken);
   }
   if( isPush ){
     request_phantoms(&xfer, 500);
   }
-  if( isPull ){
+  if( isClone && nGimme==0 ){
+    /* The initial "clone" message from client to server contains no
+    ** "gimme" cards. On that initial message, send the client an "igot"
+    ** card for every artifact currently in the respository.  This will
+    ** cause the client to create phantoms for all artifacts, which will
+    ** in turn make sure that the entire repository is sent efficiently
+    ** and expeditiously.
+    */
+    send_all(&xfer);
+  }else if( isPull ){
     create_cluster();
     send_unclustered(&xfer);
   }
   db_end_transaction(0);
 }
@@ -738,10 +764,16 @@
   disableLogin = 1;
   page_xfer();
   printf("%s\n", cgi_extract_content(&notUsed));
 }
 
+/*
+** Format strings for progress reporting.
+*/
+static const char zLabel[] = "%-10s %10s %10s %10s %10s %10s\n";
+static const char zValue[] = "\r%-10s %10d %10d %10d %10d %10d\n";
+
 
 /*
 ** Sync to the host identified in g.urlName and g.urlPath.  This
 ** routine is called by the client.
 **
@@ -751,11 +783,11 @@
 */
 void client_sync(int pushFlag, int pullFlag, int cloneFlag){
   int go = 1;        /* Loop until zero */
   const char *zSCode = db_get("server-code", "x");
   const char *zPCode = db_get("project-code", 0);
-  int nMsg = 0;          /* Number of messages sent or received */
+  int nCard = 0;         /* Number of cards sent or received */
   int nCycle = 0;        /* Number of round trips to the server */
   int nFileSend = 0;
   int nFileRecv;          /* Number of files received */
   int mxPhantomReq = 200; /* Max number of phantoms to request per comm */
   const char *zCookie;    /* Server cookie */
@@ -786,21 +818,21 @@
   */
   if( cloneFlag ){
     blob_appendf(&send, "clone\n");
     pushFlag = 0;
     pullFlag = 0;
-    nMsg++;
+    nCard++;
     /* TBD: Request all transferable configuration values */
   }else if( pullFlag ){
     blob_appendf(&send, "pull %s %s\n", zSCode, zPCode);
-    nMsg++;
+    nCard++;
   }
   if( pushFlag ){
     blob_appendf(&send, "push %s %s\n", zSCode, zPCode);
-    nMsg++;
+    nCard++;
   }
-
+  printf(zLabel, "", "Bytes", "Cards", "Artifacts", "Deltas", "Dangling");
 
   while( go ){
     int newPhantom = 0;
 
     /* Send make the most recently received cookie.  Let the server
@@ -809,54 +841,59 @@
     zCookie = db_get("cookie", 0);
     if( zCookie ){
       blob_appendf(&send, "cookie %s\n", zCookie);
     }
 
-    /* Generate gimme messages for phantoms and leaf messages
+    /* Generate gimme cards for phantoms and leaf cards
     ** for all leaves.
     */
     if( pullFlag || cloneFlag ){
       request_phantoms(&xfer, mxPhantomReq);
     }
     if( pushFlag ){
       send_unsent(&xfer);
-      nMsg += send_unclustered(&xfer);
+      nCard += send_unclustered(&xfer);
     }
 
     /* Exchange messages with the server */
     nFileSend = xfer.nFileSent + xfer.nDeltaSent;
-    printf("Sent:      %10d bytes, %5d messages, %5d files (%d+%d)\n",
-            blob_size(&send), nMsg+xfer.nGimmeSent+xfer.nIGotSent,
+    printf(zValue, "Send:",
+            blob_size(&send), nCard+xfer.nGimmeSent+xfer.nIGotSent,
+            xfer.nFileSent, xfer.nDeltaSent, 0);
+#if 0
+    printf("Sent:      %10d bytes, %5d cards, %5d files (%d+%d)\n",
+            blob_size(&send), nCard+xfer.nGimmeSent+xfer.nIGotSent,
             nFileSend, xfer.nFileSent, xfer.nDeltaSent);
-    nMsg = 0;
+#endif
+    nCard = 0;
     xfer.nFileSent = 0;
     xfer.nDeltaSent = 0;
     xfer.nGimmeSent = 0;
     fflush(stdout);
     http_exchange(&send, &recv);
     blob_reset(&send);
 
     /* Begin constructing the next message (which might never be
-    ** sent) by beginning with the pull or push messages
+    ** sent) by beginning with the pull or push cards
     */
     if( pullFlag ){
       blob_appendf(&send, "pull %s %s\n", zSCode, zPCode);
-      nMsg++;
+      nCard++;
     }
     if( pushFlag ){
       blob_appendf(&send, "push %s %s\n", zSCode, zPCode);
-      nMsg++;
+      nCard++;
     }
 
     /* Process the reply that came back from the server */
     while( blob_line(&recv, &xfer.line) ){
       if( blob_buffer(&xfer.line)[0]=='#' ){
         continue;
       }
       xfer.nToken = blob_tokenize(&xfer.line, xfer.aToken, count(xfer.aToken));
-      nMsg++;
-      printf("\r%d", nMsg);
+      nCard++;
+      printf("\r%d", nCard);
       fflush(stdout);
 
       /*   file UUID SIZE \n CONTENT
       **   file UUID DELTASRC SIZE \n CONTENT
       **
@@ -920,11 +957,11 @@
         if( zPCode==0 ){
           zPCode = mprintf("%b", &xfer.aToken[2]);
           db_set("project-code", zPCode, 0);
         }
         blob_appendf(&send, "clone\n");
-        nMsg++;
+        nCard++;
       }else
 
       /*   config NAME SIZE \n CONTENT
       **
       ** Receive a configuration value from the server.
@@ -978,15 +1015,19 @@
         fossil_fatal("%b", &xfer.err);
       }
       blobarray_reset(xfer.aToken, xfer.nToken);
       blob_reset(&xfer.line);
     }
-    printf("\rReceived:  %10d bytes, %5d messages, %5d files (%d+%d+%d)\n",
-            blob_size(&recv), nMsg,
+    printf(zValue, "Received:",
+            blob_size(&recv), nCard,
+            xfer.nFileRcvd, xfer.nDeltaRcvd, xfer.nDanglingFile);
+#if 0
+    printf("\rReceived:  %10d bytes, %5d cards, %5d files (%d+%d+%d)\n",
+            blob_size(&recv), nCard,
             xfer.nFileRcvd + xfer.nDeltaRcvd + xfer.nDanglingFile,
             xfer.nFileRcvd, xfer.nDeltaRcvd, xfer.nDanglingFile);
-
+#endif
     blob_reset(&recv);
     nCycle++;
     go = 0;
 
     /* If we received one or more files on the previous exchange but
@@ -996,11 +1037,11 @@
     if( (nFileRecv>0 || newPhantom) && db_exists("SELECT 1 FROM phantom") ){
       go = 1;
       mxPhantomReq = nFileRecv*2;
       if( mxPhantomReq<200 ) mxPhantomReq = 200;
     }
-    nMsg = 0;
+    nCard = 0;
     xfer.nFileRcvd = 0;
     xfer.nDeltaRcvd = 0;
     xfer.nDanglingFile = 0;
 
     /* If we have one or more files queued to send, then go