Check-in [e8cf0061cc]
Not logged in
Overview

SHA1 Hash:e8cf0061cc47c87521f9401c42c52fd40e664f25
Date: 2008-02-04 13:53:55
User: drh
Comment:Tweaks to the diff algorithm give a 4x performance increase. Now comparable to command-line diff.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified src/diff.c from [62a5d65904] to [50ab9d9846].

@@ -305,19 +305,24 @@
   int iS1, int iE1,
   int iS2, int iE2,
   int *piSX, int *piEX,
   int *piSY, int *piEY
 ){
-  int bestScore = -1000000000;
-  int i, j;
-  int iSX, iSY, iEX, iEY;
-  int score, skew, dist, mid;
-
-  *piSX = iS1;
-  *piEX = iS1;
-  *piSY = iS2;
-  *piEY = iS2;
+  double bestScore = -1e30;  /* Best score seen so far */
+  int i, j;                  /* Loop counters */
+  int iSX, iSY, iEX, iEY;    /* Current match */
+  double score;              /* Current score */
+  int skew;                  /* How lopsided is the match */
+  int dist;                  /* Distance of match from center */
+  int mid;                   /* Center of the span */
+  int iSXb, iSYb, iEXb, iEYb;   /* Best match so far */
+  int iSXp, iSYp, iEXp, iEYp;   /* Previous match */
+
+  iSXb = iSXp = iS1;
+  iEXb = iEXp = iS1;
+  iSYb = iSYp = iS2;
+  iEYb = iEYp = iS2;
   mid = (iE1 + iS1)/2;
   for(i=iS1; i<iE1; i++){
     int limit = 0;
     j = p->aTo[p->aFrom[i].h % p->nTo].iHash;
     while( j>0
@@ -328,10 +333,13 @@
         break;
       }
       j = p->aTo[j-1].iNext;
     }
     if( j==0 ) continue;
+    assert( i>=iSXb && i>=iSXp );
+    if( i<iEXb && j>=iSYb && j<iEYb ) continue;
+    if( i<iEXp && j>=iSYp && j<iEYp ) continue;
     iSX = i;
     iSY = j-1;
     while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){
       iSX--;
       iSY--;
@@ -347,16 +355,25 @@
     dist = (iSX+iEX)/2 - mid;
     if( dist<0 ) dist = -dist;
     score = (iEX - iSX) - 0.05*skew - 0.05*dist;
     if( score>bestScore ){
       bestScore = score;
-      *piSX = iSX;
-      *piSY = iSY;
-      *piEX = iEX;
-      *piEY = iEY;
-    }
-  }
+      iSXb = iSX;
+      iSYb = iSY;
+      iEXb = iEX;
+      iEYb = iEY;
+    }else{
+      iSXp = iSX;
+      iSYp = iSY;
+      iEXp = iEX;
+      iEYp = iEY;
+    }
+  }
+  *piSX = iSXb;
+  *piSY = iSYb;
+  *piEX = iEXb;
+  *piEY = iEYb;
   /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n",
      iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY);  */
 }
 
 /*
@@ -393,10 +410,45 @@
     appendTriple(p, 0, iE1-iS1, iE2-iS2);
   }
 }
 
 /*
+** Compute the differences between two files already loaded into
+** the DContext structure.
+*/
+static void diff_all(DContext *p){
+  int mnE, iS, iE1, iE2;
+
+  /* Carve off the common header and footer */
+  iE1 = p->nFrom;
+  iE2 = p->nTo;
+  while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){
+    iE1--;
+    iE2--;
+  }
+  mnE = iE1<iE2 ? iE1 : iE2;
+  for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){}
+
+  /* do the difference */
+  if( iS>0 ){
+    appendTriple(p, iS, 0, 0);
+  }
+  diff_step(p, iS, iE1, iS, iE2);
+  if( iE1<p->nFrom ){
+    appendTriple(p, p->nFrom - iE1, 0, 0);
+  }
+
+  /* Terminate the COPY/DELETE/INSERT triples with three zeros */
+  expandEdit(p, p->nEdit+3);
+  if( p->aEdit ){
+    p->aEdit[p->nEdit++] = 0;
+    p->aEdit[p->nEdit++] = 0;
+    p->aEdit[p->nEdit++] = 0;
+  }
+}
+
+/*
 ** Generate a report of the differences between files pA and pB.
 ** If pOut is not NULL then a unified diff is appended there.  It
 ** is assumed that pOut has already been initialized.  If pOut is
 ** NULL, then a pointer to an array of integers is returned.
 ** The integers come in triples.  For each triple,
@@ -413,12 +465,12 @@
   Blob *pB_Blob,   /* TO file */
   Blob *pOut,      /* Write unified diff here if not NULL */
   int nContext     /* Amount of context to unified diff */
 ){
   DContext c;
-  int mnE, iS, iE1, iE2;
-
+
+  /* Prepare the input files */
   memset(&c, 0, sizeof(c));
   c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom);
   c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo);
   if( c.aFrom==0 || c.aTo==0 ){
     free(c.aFrom);
@@ -427,35 +479,12 @@
       blob_appendf(pOut, "cannot compute difference between binary files\n");
     }
     return 0;
   }
 
-  /* Carve off the common header and footer */
-  iE1 = c.nFrom;
-  iE2 = c.nTo;
-  while( iE1>0 && iE2>0 && same_dline(&c.aFrom[iE1-1], &c.aTo[iE2-1]) ){
-    iE1--;
-    iE2--;
-  }
-  mnE = iE1<iE2 ? iE1 : iE2;
-  for(iS=0; iS<mnE && same_dline(&c.aFrom[iS],&c.aTo[iS]); iS++){}
-
-  /* do the difference */
-  if( iS>0 ){
-    appendTriple(&c, iS, 0, 0);
-  }
-  diff_step(&c, iS, iE1, iS, iE2);
-  if( iE1<c.nFrom ){
-    appendTriple(&c, c.nFrom - iE1, 0, 0);
-  }
-
-  expandEdit(&c, c.nEdit+3);
-  if( c.aEdit ){
-    c.aEdit[c.nEdit++] = 0;
-    c.aEdit[c.nEdit++] = 0;
-    c.aEdit[c.nEdit++] = 0;
-  }
+  /* Compute the difference */
+  diff_all(&c);
 
   if( pOut ){
     /* Compute a context diff if requested */
     contextDiff(&c, pOut, nContext);
     free(c.aFrom);
@@ -462,12 +491,11 @@
     free(c.aTo);
     free(c.aEdit);
     return 0;
   }else{
     /* If a context diff is not requested, then return the
-    ** array of COPY/DELETE/INSERT triples after terminating the
-    ** array with a triple of all zeros.
+    ** array of COPY/DELETE/INSERT triples.
     */
     free(c.aFrom);
     free(c.aTo);
     return c.aEdit;
   }