Check-in [ac6bb3ce06]
Not logged in
Overview

SHA1 Hash:ac6bb3ce069523ecc7b664ff41a8b5cc3fc7f657
Date: 2007-11-07 22:22:02
User: drh
Comment:Improvements to the merge algorithm so that it works better for common changes. Still more work needed.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified src/merge3.c from [5d18e08162] to [73c60b9f66].

@@ -273,10 +273,11 @@
   const char *zOut,      /* The target file */
   int lenOut,            /* Length of the target file */
   Mapping *pMap          /* Write the map of similaries here */
 ){
   int i, j, base, prefix;
+  int changes;
   hash h;
   int *collide;
   int origLenOut = lenOut;
   struct Mapping_entry *aMap;
   int landmark[MX_LANDMARK];
@@ -420,83 +421,90 @@
       hash_next(&h, zOut[base+i+NHASH]);
       i++;
     }
   }
   free(collide);
+#if 0
   DEBUG1(
    printf("after creation:\n");
    MappingPrint(pMap);
   )
+#endif
 
   /* In this step, we will remove overlapping entries from the mapping.
   **
   ** We use a greedy algorithm.  Select the largest mapping first and
   ** remove all overlapping mappings.  Then take the next largest
   ** mapping and remove others that overlap with it.  Keep going until
   ** all mappings have been processed.
   */
   MappingSortSize(pMap);
-  for(i=0; i<pMap->nUsed; i++){
-    int sortNeeded = 0;
-    int purgeNeeded = 0;
-    struct Mapping_entry *pA;
-    pA = &pMap->aMap[i];
-    for(j=i+1; j<pMap->nUsed; j++){
-      int diff;
-      struct Mapping_entry *pB;
-      pB = &pMap->aMap[j];
-      if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){
-        /* No overlap.  Do nothing */
-      }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
-        /* B is contained entirely within A.  Drop B */
-        pB->fromFirst = -1;
-        purgeNeeded = 1;
-        continue;
-      }else if( pB->fromFirst<pA->fromFirst ){
-        /* The tail B overlaps the head of A */
-        assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
-        diff = pB->fromLast + 1 - pA->fromFirst;
-        pB->fromLast -= diff;
-        pB->toLast -= diff;
-        sortNeeded = 1;
-      }else{
-        /* The head of B overlaps the tail of A */
-        assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
-        diff = pA->fromLast + 1 - pB->fromFirst;
-        pB->fromFirst += diff;
-        pB->toFirst += diff;
-        sortNeeded = 1;
+  do{
+    changes = 0;
+    for(i=0; i<pMap->nUsed; i++){
+      int sortNeeded = 0;
+      int purgeNeeded = 0;
+      struct Mapping_entry *pA;
+      pA = &pMap->aMap[i];
+      for(j=i+1; j<pMap->nUsed; j++){
+        int diff;
+        struct Mapping_entry *pB;
+        pB = &pMap->aMap[j];
+        if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){
+          /* No overlap.  Do nothing */
+        }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
+          /* B is contained entirely within A.  Drop B */
+          pB->fromFirst = -1;
+          purgeNeeded = 1;
+          continue;
+        }else if( pB->fromFirst<pA->fromFirst ){
+          /* The tail B overlaps the head of A */
+          assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
+          diff = pB->fromLast + 1 - pA->fromFirst;
+          pB->fromLast -= diff;
+          pB->toLast -= diff;
+          sortNeeded = 1;
+        }else{
+          /* The head of B overlaps the tail of A */
+          assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
+          diff = pA->fromLast + 1 - pB->fromFirst;
+          pB->fromFirst += diff;
+          pB->toFirst += diff;
+          sortNeeded = 1;
+        }
+        if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){
+          /* No overlap.  Do nothing */
+        }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
+          /* B is contained entirely within A.  Drop B */
+          pB->fromFirst = -1;
+          purgeNeeded = 1;
+        }else if( pB->toFirst<pA->toFirst ){
+          /* The tail of B overlaps the head of A */
+          assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
+          diff = pB->toLast + 1 - pA->toFirst;
+          pB->fromLast -= diff;
+          pB->toLast -= diff;
+          sortNeeded = 1;
+        }else{
+          /* The head of B overlaps the tail of A */
+          assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
+          diff = pA->toLast + 1 - pB->toFirst;
+          pB->fromFirst += diff;
+          pB->toFirst += diff;
+          sortNeeded = 1;
+        }
+      }
+      if( purgeNeeded ){
+        MappingPurgeDeletedEntries(pMap);
+        /* changes++; */
       }
-      if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){
-        /* No overlap.  Do nothing */
-      }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
-        /* B is contained entirely within A.  Drop B */
-        pB->fromFirst = -1;
-        purgeNeeded = 1;
-      }else if( pB->toFirst<pA->toFirst ){
-        /* The tail of B overlaps the head of A */
-        assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
-        diff = pB->toLast + 1 - pA->toFirst;
-        pB->fromLast -= diff;
-        pB->toLast -= diff;
-        sortNeeded = 1;
-      }else{
-        /* The head of B overlaps the tail of A */
-        assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
-        diff = pA->toLast + 1 - pB->toFirst;
-        pB->fromFirst += diff;
-        pB->toFirst += diff;
-        sortNeeded = 1;
+      if( sortNeeded && i<pMap->nUsed-2 ){
+        MappingSortSize(pMap);
+        /* changes++; */
       }
     }
-    if( purgeNeeded ){
-      MappingPurgeDeletedEntries(pMap);
-    }
-    if( sortNeeded && i<pMap->nUsed-2 ){
-      MappingSortSize(pMap);
-    }
-  }
+  }while( changes );
 
   /* Final step:  Arrange the mapping entires so that they are in the
   ** order of the output file.  Then fill in the nExtra values.
   */
 add_nextra:
@@ -547,36 +555,41 @@
 
 /*
 ** Do a three-way merge.  Initialize pOut to contain the result.
 */
 void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){
-  Mapping map1, map2;
-  int i;
+  Mapping map1, map2, map3;
+  int i, j;
   const char *zV1, *zV2;
   blob_zero(pOut);
-  DEBUG1( printf("map1:\n"); )
   MappingInit(
     blob_buffer(pPivot), blob_size(pPivot),
     blob_buffer(pV1), blob_size(pV1),
     &map1);
   MappingSortFrom(&map1);
   DEBUG1(
     printf("map1-final:\n");
     MappingPrint(&map1);
-    printf("map2:\n");
   )
   MappingInit(
     blob_buffer(pPivot), blob_size(pPivot),
     blob_buffer(pV2), blob_size(pV2),
     &map2);
   DEBUG1(
     printf("map2-final:\n");
     MappingPrint(&map2);
   )
+  MappingInit(
+    blob_buffer(pV1), blob_size(pV1),
+    blob_buffer(pV2), blob_size(pV2),
+    &map3);
+  DEBUG1(
+    printf("map3-final:\n");
+    MappingPrint(&map3);
+  )
   zV1 = blob_buffer(pV1);
   zV2 = blob_buffer(pV2);
-  if( map2.nUsed==0 ) return;
   if( map1.aMap[0].toFirst>0 ){
     blob_append(pOut, zV1, map1.aMap[0].toFirst);
     DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n",
             map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); )
   }
@@ -583,22 +596,39 @@
   if( map2.aMap[0].toFirst>0 ){
     blob_append(pOut, zV2, map2.aMap[0].toFirst);
     DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n",
             map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); )
   }
-  for(i=0; i<map2.nUsed; i++){
-    int iFirst, iLast, nInsert;
+  for(i=j=0; i<map2.nUsed; i++){
+    int iFirst, iLast, nInsert, iTail;
     struct Mapping_entry *p = &map2.aMap[i];
+    while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; }
+    DEBUG1(
+      printf("map2: %6d..%-6d %6d..%-6d  %d\n",
+        p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra);
+      printf("map3:       j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst,
+        map3.aMap[j].toLast);
+    );
+    iTail = p->toLast + p->nExtra;
+    if( i<map2.nUsed-1 &&
+         map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){
+      blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1);
+      DEBUG1(
+        printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1,
+                p->toFirst, iTail);
+      )
+      continue;
+    }
     iFirst = MappingTranslate(&map1, p->fromFirst, 0);
     iLast = MappingTranslate(&map1, p->fromLast, &nInsert);
     blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1);
     DEBUG1(
       printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast);
     )
     if( p->nExtra>0 ){
-      if( p->nExtra==nInsert
-          && memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
+      if( p->nExtra==nInsert &&
+          memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
         /* Omit a duplicate insert */
         DEBUG1( printf("OMIT duplicate insert\n"); )
       }else{
         blob_append(pOut, &zV2[p->toLast+1], p->nExtra);
         DEBUG1(
@@ -608,10 +638,11 @@
       }
     }
   }
   MappingClear(&map1);
   MappingClear(&map2);
+  MappingClear(&map3);
 }
 
 /*
 ** COMMAND:  test-3-way-merge
 **
@@ -651,11 +682,11 @@
 /*
 ** COMMAND:  test-mapping
 */
 void mapping_test(void){
   int i;
-  const char *z;
+  const char *za, *zb;
   Blob a, b;
   Mapping map;
   if( g.argc!=4 ){
     usage("FILE1 FILE2");
   }
@@ -663,15 +694,24 @@
   blob_read_from_file(&b, g.argv[3]);
   memset(&map, 0, sizeof(map));
   MappingInit(blob_buffer(&a), blob_size(&a),
               blob_buffer(&b), blob_size(&b),
               &map);
-  z = blob_buffer(&a);
+  DEBUG1(
+    printf("map-final:\n");
+    MappingPrint(&map);
+  )
+  za = blob_buffer(&a);
+  zb = blob_buffer(&b);
   for(i=0; i<map.nUsed; i++){
     printf("======= %6d..%-6d %6d..%-6d %d\n",
          map.aMap[i].fromFirst, map.aMap[i].fromLast,
          map.aMap[i].toFirst, map.aMap[i].toLast,
          map.aMap[i].nExtra);
     printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1,
-                     &z[map.aMap[i].fromFirst]);
+                     &za[map.aMap[i].fromFirst]);
+    if( map.aMap[i].nExtra ){
+      printf("======= EXTRA:\n");
+      printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]);
+    }
   }
 }