Differences From:
File
src/diff.c
part of check-in
[57b2735ebd]
- Enhanced text diff subroutine uses Myers enhancements to Wagners
minimum edit distance algorithm. White space at the end of lines
is ignored.
by
drh on
2007-11-15 21:49:14.
[view]
To:
File
src/diff.c
part of check-in
[f1b55da0ac]
- Bug fixes in the Myers diff algorithm.
by
drh on
2007-11-16 03:17:35.
[view]
@@ -52,9 +52,9 @@
/* Count the number of lines. Allocate space to hold
** the returned array.
*/
for(i=0, nLine=1; z[i]; i++){
- if( z[i]=='\n' ) nLine++;
+ if( z[i]=='\n' && z[i+1]!=0 ) nLine++;
}
a = malloc( nLine*sizeof(a[0]) );
if( a==0 ) fossil_panic("out of memory");
@@ -184,8 +184,9 @@
int K, D; /* The diagonal and d for the final solution */
int *R; /* Result vector */
int r; /* Loop variables */
int go = 1; /* Outer loop control */
+ int MAX; /* Largest of X and Y */
/* Break the two files being diffed into individual lines.
** Compute hashes on each line for fast comparison.
*/
@@ -192,9 +193,10 @@
A = break_into_lines(blob_str(pA_Blob), &X);
B = break_into_lines(blob_str(pB_Blob), &Y);
szM = 0;
- for(d=0; go; d++){
+ MAX = X>Y ? X : Y;
+ for(d=0; go && d<=MAX; d++){
if( szM<d+1 ){
szM += szM + 10;
M = realloc(M, sizeof(M[0])*szM);
if( M==0 ){
@@ -217,80 +219,103 @@
q = M[d-1][i-1];
}
x = (k + q + 1)/2;
y = x - k;
- while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
+ if( x<0 || x>X || y<0 || y>Y ){
+ x = y = 0;
+ }else{
+ while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
+ }
M[d][i] = x + y;
+ /* printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); */
if( x==X && y==Y ){
go = 0;
break;
}
}
}
-
- /* Reuse M[] as follows:
- **
- ** M[d][1] = 1 if a line is inserted or 1 if a line is deleted.
- ** M[d][0] = number of lines copied at this step.
- **
- */
- D = d - 1;
- K = X - Y;
- for(d=D, i=(K+D)/2; d>0; d--){
- if( i==d || M[d-1][i-1] > M[d-1][i] ){
- M[d][0] = M[d][i] - M[d-1][i-1] - 1;
- M[d][1] = 0;
- i--;
- }else{
- M[d][0] = M[d][i] - M[d-1][i] - 1;
- M[d][1] = 1;
+ if( d>MAX ){
+ R = malloc( sizeof(R[0])*6 );
+ R[0] = 0;
+ R[1] = X;
+ R[2] = Y;
+ R[3] = 0;
+ R[4] = 0;
+ R[5] = 0;
+ }else{
+ /* Reuse M[] as follows:
+ **
+ ** M[d][1] = 1 if a line is inserted or 0 if a line is deleted.
+ ** M[d][0] = number of lines copied after the ins or del above.
+ **
+ */
+ D = d - 1;
+ K = X - Y;
+ for(d=D, i=(K+D)/2; d>0; d--){
+ /* printf("d=%d i=%d\n", d, i); */
+ if( i==d || (i>0 && M[d-1][i-1] > M[d-1][i]) ){
+ M[d][0] = M[d][i] - M[d-1][i-1] - 1;
+ M[d][1] = 0;
+ i--;
+ }else{
+ M[d][0] = M[d][i] - M[d-1][i] - 1;
+ M[d][1] = 1;
+ }
+ }
+
+
+ /*
+ printf("---------------\nM[0][0] = %5d\n", M[0][0]);
+ for(d=1; d<=D; d++){
+ printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]);
}
- }
-
- /* Allocate the output vector
- */
- R = malloc( sizeof(R[0])*(D+2)*3 );
- if( R==0 ){
- fossil_fatal("out of memory");
- }
-
- /* Populate the output vector
- */
- d = r = 0;
- while( d<=D ){
- int n;
- R[r++] = M[d++][0]/2; /* COPY */
- if( d>D ){
- R[r++] = 0;
- R[r++] = 0;
- break;
+ */
+
+ /* Allocate the output vector
+ */
+ R = malloc( sizeof(R[0])*(D+2)*3 );
+ if( R==0 ){
+ fossil_fatal("out of memory");
}
- if( M[d][1]==0 ){
+
+ /* Populate the output vector
+ */
+ d = r = 0;
+ while( d<=D ){
+ int n;
+ R[r++] = M[d++][0]/2; /* COPY */
+ if( d>D ){
+ R[r++] = 0;
+ R[r++] = 0;
+ break;
+ }
+ if( M[d][1]==0 ){
+ n = 1;
+ while( M[d][0]==0 && d<D && M[d+1][1]==0 ){
+ d++;
+ n++;
+ }
+ R[r++] = n; /* DELETE */
+ if( d==D || M[d][0]>0 ){
+ R[r++] = 0; /* INSERT */
+ continue;
+ }
+ d++;
+ }else{
+ R[r++] = 0; /* DELETE */
+ }
+ assert( M[d][1]==1 );
n = 1;
- while( M[d][0]==0 && d<D && M[d+1][1]==0 ){
+ while( M[d][0]==0 && d<D && M[d+1][1]==1 ){
d++;
n++;
}
- R[r++] = n; /* DELETE */
- if( d==D || M[d][0]>0 ){
- R[r++] = 0; /* INSERT */
- continue;
- }
- d++;
- }else{
- R[r++] = 0; /* DELETE */
+ R[r++] = n; /* INSERT */
}
- assert( M[d][1]==1 );
- n = 1;
- while( M[d][0]==0 && d<D && M[d+1][1]==1 ){
- d++;
- n++;
- }
- R[r++] = n; /* INSERT */
- }
- R[r++] = 0;
- R[r++] = 0;
- R[r++] = 0;
+ R[r++] = 0;
+ R[r++] = 0;
+ R[r++] = 0;
+ }
/* Free the Myers matrix */
for(d=0; d<=D; d++){
free(M[d]);
@@ -308,11 +333,12 @@
int i, j; /* Loop counters */
int m; /* Number of lines to output */
int skip; /* Number of lines to skip */
- for(r=0; R[r+3]; r += 3*nr){
+ for(r=0; R[r+1] || R[r+2] || R[r+3]; r += 3*nr){
/* Figure out how many triples to show in a single block */
for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
+ /* printf("r=%d nr=%d\n", r, nr); */
/* For the current block comprising nr triples, figure out
** how many lines of A and B are to be displayed
*/
@@ -326,14 +352,14 @@
for(i=0; i<nr; i++){
na += R[r+i*3+1];
nb += R[r+i*3+2];
}
- if( R[r+i*3]>nContext ){
+ if( R[r+nr*3]>nContext ){
na += nContext;
nb += nContext;
}else{
- na += R[r+i*3];
- nb += R[r+i*3];
+ na += R[r+nr*3];
+ nb += R[r+nr*3];
}
for(i=1; i<nr; i++){
na += R[r+i*3];
nb += R[r+i*3];