Check-in [522104c2cd]
Not logged in
Overview

SHA1 Hash:522104c2cd244485f6a952641c05777864f888f8
Date: 2009-03-24 22:03:56
User: drh
Comment:Improvements to the delta generator algorthm so that it runs much faster on large files with very few similarities. There is no change to the delta format generated, so this is fully backwards and forwards compatible.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified src/delta.c from [abb4c0444e] to [1af641b4b6].

@@ -222,15 +222,10 @@
   }
   return sum;
 }
 
 /*
-** Maximum number of landmarks to set in the source file.
-*/
-#define MX_LANDMARK (1024*128)
-
-/*
 ** Create a new delta.
 **
 ** The delta is written into a preallocated buffer, zDelta, which
 ** should be at least 60 bytes longer than the target file, zOut.
 ** The delta string will be NUL-terminated, but it might also contain
@@ -297,13 +292,14 @@
   char *zDelta           /* Write the delta into this buffer */
 ){
   int i, base;
   char *zOrigDelta = zDelta;
   hash h;
-  int *collide;
-  int lastRead = -1;         /* Last byte of zSrc read by a COPY command */
-  int landmark[MX_LANDMARK];
+  int nHash;                 /* Number of hash table entries */
+  int *landmark;             /* Primary hash table */
+  int *collide;              /* Collision chain */
+  int lastRead = -1;         /* Last byte of zSrc read by a COPY command */
 
   /* Add the target file size to the beginning of the delta
   */
   putInt(lenOut, &zDelta);
   *(zDelta++) = '\n';
@@ -323,18 +319,20 @@
   }
 
   /* Compute the hash table used to locate matching sections in the
   ** source file.
   */
-  collide = malloc( lenSrc*sizeof(int)/NHASH );
+  nHash = lenSrc/NHASH;
+  collide = malloc( nHash*2*sizeof(int) );
   if( collide==0 ) return -1;
-  memset(landmark, -1, sizeof(landmark));
-  memset(collide, -1, lenSrc*sizeof(int)/NHASH );
+  landmark = &collide[nHash];
+  memset(landmark, -1, nHash*sizeof(int));
+  memset(collide, -1, nHash*sizeof(int));
   for(i=0; i<lenSrc-NHASH; i+=NHASH){
     int hv;
     hash_init(&h, &zSrc[i]);
-    hv = hash_32bit(&h) & (MX_LANDMARK-1);
+    hv = hash_32bit(&h) % nHash;
     collide[i/NHASH] = landmark[hv];
     landmark[hv] = i/NHASH;
   }
 
   /* Begin scanning the target file and generating copy commands and
@@ -349,11 +347,11 @@
     bestCnt = 0;
     while( 1 ){
       int hv;
       int limit = 250;
 
-      hv = hash_32bit(&h) & (MX_LANDMARK-1);
+      hv = hash_32bit(&h) % nHash;
       DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
       iBlock = landmark[hv];
       while( iBlock>=0 && (limit--)>0 ){
         /*
         ** The hash window has identified a potential match against