Overview
SHA1 Hash: | 36b96b861614cd44073a9e33c9d6f91608d03158 |
---|---|
Date: | 2007-11-16 20:42:31 |
User: | drh |
Comment: | Rework the merge algorithm. It now only works for text files. But, it no longer gets confused by line endings (\r\n versus \n) and it reports conflicts. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified src/blob.c from [d66858e82e] to [1da99958db].
@@ -402,10 +402,25 @@ blob_extract(pFrom, i-pFrom->iCursor, pTo); return pTo->nUsed; } /* +** Trim whitespace off of the end of a blob. Return the number +** of characters remaining. +** +** All this does is reduce the length counter. This routine does +** not insert a new zero terminator. +*/ +int blob_trim(Blob *p){ + char *z = p->aData; + int n = p->nUsed; + while( n>0 && isspace(z[n-1]) ){ n--; } + p->nUsed = n; + return n; +} + +/* ** Extract a single token from pFrom and use it to initialize pTo. ** Return the number of bytes in the token. If no token is found, ** return 0. ** ** A token consists of one or more non-space characters. Leading @@ -438,10 +453,40 @@ int blob_tail(Blob *pFrom, Blob *pTo){ int iCursor = pFrom->iCursor; blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo); pFrom->iCursor = iCursor; return pTo->nUsed; +} + +/* +** Copy N lines of text from pFrom into pTo. The copy begins at the +** current cursor position of pIn. The pIn cursor is left pointing +** at the first character past the last \n copied. +** +** If pTo==NULL then this routine simply skips over N lines. +*/ +void blob_copy_lines(Blob *pTo, Blob *pFrom, int N){ + char *z = pFrom->aData; + int i = pFrom->iCursor; + int n = pFrom->nUsed; + int cnt = 0; + + if( N==0 ) return; + while( i<n ){ + if( z[i]=='\n' ){ + cnt++; + if( cnt==N ){ + i++; + break; + } + } + i++; + } + if( pTo ){ + blob_append(pTo, &pFrom->aData[pFrom->iCursor], i - pFrom->iCursor); + } + pFrom->iCursor = i; } /* ** Return true if the blob contains a valid UUID_SIZE-digit base16 identifier. */
Modified src/diff.c from [09050dfe39] to [017b424914].
@@ -26,50 +26,78 @@ */ #include "config.h" #include "diff2.h" #include <assert.h> + +#if 0 +#define DEBUG(X) X +#else +#define DEBUG(X) +#endif + /* ** Information about each line of a file being diffed. +** +** The lower 20 bits of the hash are the length of the +** line. If any line is longer than 1048575 characters, +** the file is considered binary. */ typedef struct DLine DLine; struct DLine { const char *z; /* The text of the line */ unsigned int h; /* Hash of the line */ }; -/* -** Break a blob into lines by converting inserting \000 characters. +#define LENGTH_MASK 0x000fffff + +/* ** Return an array of DLine objects containing a pointer to the -** start of each line and a hash of that line. +** start of each line and a hash of that line. The lower +** bits of the hash store the length of each line. ** ** Trailing whitespace is removed from each line. +** +** Return 0 if the file is binary or contains a line that is +** longer than 1048575 bytes. */ static DLine *break_into_lines(char *z, int *pnLine){ int nLine, i, j, k, x; unsigned int h; DLine *a; /* Count the number of lines. Allocate space to hold ** the returned array. */ - for(i=0, nLine=1; z[i]; i++){ - if( z[i]=='\n' && z[i+1]!=0 ) nLine++; + for(i=j=0, nLine=1; z[i]; i++, j++){ + int c = z[i]; + if( c==0 ){ + return 0; + } + if( c=='\n' && z[i+1]!=0 ){ + nLine++; + if( j>1048575 ){ + return 0; + } + j = 0; + } + } + if( j>1048575 ){ + return 0; } a = malloc( nLine*sizeof(a[0]) ); if( a==0 ) fossil_panic("out of memory"); /* Fill in the array */ for(i=0; i<nLine; i++){ a[i].z = z; for(j=0; z[j] && z[j]!='\n'; j++){} for(k=j; k>0 && isspace(z[k-1]); k--){} - z[k] = 0; for(h=0, x=0; x<k; x++){ h = h ^ (h<<2) ^ z[x]; } - a[i].h = h; + a[i].h = (h<<20) | k;; z += j+1; } /* Return results */ *pnLine = nLine; @@ -78,26 +106,28 @@ /* ** Return true if two DLine elements are identical. */ static int same_dline(DLine *pA, DLine *pB){ - return pA->h==pB->h && strcmp(pA->z,pB->z)==0; + return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; } /* ** Generate a report of the differences between files pA and pB. -** The line ending (\r\n versus \n) is ignored - the two line -** endings are considered to be equivalent. -** -** The return is a pointer to an array of integers that describes -** the difference. Integers come in triples. For each triple, +** If pOut is not NULL then a unified diff is appended there. It +** is assumed that pOut has already been initialized. If pOut is +** NULL, then a pointer to an array of integers is returned. +** The integers come in triples. For each triple, ** the elements are the number of lines copied, the number of -** lines delete, and the number of lines inserted. The vector +** lines deleted, and the number of lines inserted. The vector ** is terminated by a triple of all zeros. ** -** The two blobs is destroyed ('\000' values are inserted) -** by the diffing process. +** This diff utility does not work on binary files. If a binary +** file is encountered, 0 is returned and pOut is written with +** text "cannot compute difference between binary files". +** +**************************************************************************** ** ** The core algorithm is a variation on the classic Wagner ** minimum edit distance with enhancements to reduce the runtime ** to be almost linear in the common case where the two files ** have a lot in common. For additional information see @@ -171,29 +201,43 @@ ** compared to X and Y so little memory is required. The ** original Wagner algorithm requires X*Y memory, which for ** larger files (100K lines) is more memory than we have at ** hand. */ -int *text_diff(Blob *pA_Blob, Blob *pB_Blob, Blob *pOut, int nContext){ +int *text_diff( + Blob *pA_Blob, /* FROM file */ + Blob *pB_Blob, /* TO file */ + Blob *pOut, /* Write unified diff here if not NULL */ + int nContext /* Amount of context to unified diff */ +){ DLine *A, *B; /* Files being compared */ int X, Y; /* Number of elements in A and B */ int x, y; /* Indices: A[x] and B[y] */ int szM = 0; /* Number of rows and columns in M */ int **M = 0; /* Myers matrix */ int i, d; /* Indices on M. M[d][i] */ int k, q; /* Diagonal number and distinct from (0,0) */ int K, D; /* The diagonal and d for the final solution */ - int *R; /* Result vector */ + int *R = 0; /* Result vector */ int r; /* Loop variables */ int go = 1; /* Outer loop control */ int MAX; /* Largest of X and Y */ /* Break the two files being diffed into individual lines. ** Compute hashes on each line for fast comparison. */ A = break_into_lines(blob_str(pA_Blob), &X); B = break_into_lines(blob_str(pB_Blob), &Y); + + if( A==0 || B==0 ){ + free(A); + free(B); + if( pOut ){ + blob_appendf(pOut, "cannot compute difference between binary files\n"); + } + return 0; + } szM = 0; MAX = X>Y ? X : Y; for(d=0; go && d<=MAX; d++){ if( szM<d+1 ){ @@ -211,11 +255,11 @@ k = 2*i - d; if( d==0 ){ q = 0; }else if( i==0 ){ q = M[d-1][0]; - }else if( M[d-1][i-1] < M[d-1][i] ){ + }else if( i<d-1 && M[d-1][i-1] < M[d-1][i] ){ q = M[d-1][i]; }else{ q = M[d-1][i-1]; } x = (k + q + 1)/2; @@ -224,11 +268,11 @@ x = y = 0; }else{ while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; } } M[d][i] = x + y; - /* printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); */ + DEBUG( printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); ) if( x==X && y==Y ){ go = 0; break; } } @@ -249,11 +293,11 @@ ** */ D = d - 1; K = X - Y; for(d=D, i=(K+D)/2; d>0; d--){ - /* printf("d=%d i=%d\n", d, i); */ + DEBUG( printf("d=%d i=%d\n", d, i); ) if( i==d || (i>0 && M[d-1][i-1] > M[d-1][i]) ){ M[d][0] = M[d][i] - M[d-1][i-1] - 1; M[d][1] = 0; i--; }else{ @@ -260,17 +304,16 @@ M[d][0] = M[d][i] - M[d-1][i] - 1; M[d][1] = 1; } } - - /* - printf("---------------\nM[0][0] = %5d\n", M[0][0]); - for(d=1; d<=D; d++){ - printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]); - } - */ + DEBUG( + printf("---------------\nM[0][0] = %5d\n", M[0][0]); + for(d=1; d<=D; d++){ + printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]); + } + ) /* Allocate the output vector */ R = malloc( sizeof(R[0])*(D+2)*3 ); if( R==0 ){ @@ -335,11 +378,11 @@ int skip; /* Number of lines to skip */ for(r=0; R[r+1] || R[r+2] || R[r+3]; r += 3*nr){ /* Figure out how many triples to show in a single block */ for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){} - /* printf("r=%d nr=%d\n", r, nr); */ + DEBUG( printf("r=%d nr=%d\n", r, nr); ) /* For the current block comprising nr triples, figure out ** how many lines of A and B are to be displayed */ if( R[r]>nContext ){ @@ -369,31 +412,31 @@ /* Show the initial common area */ a += skip; b += skip; m = R[r] - skip; for(j=0; j<m; j++){ - blob_appendf(pOut," %s\n", A[a+j].z); + blob_appendf(pOut," %.*s\n", A[a+j].h & LENGTH_MASK, A[a+j].z); } a += m; b += m; /* Show the differences */ for(i=0; i<nr; i++){ m = R[r+i*3+1]; for(j=0; j<m; j++){ - blob_appendf(pOut,"-%s\n", A[a+j].z); + blob_appendf(pOut,"-%.*s\n", A[a+j].h & LENGTH_MASK, A[a+j].z); } a += m; m = R[r+i*3+2]; for(j=0; j<m; j++){ - blob_appendf(pOut,"+%s\n", B[b+j].z); + blob_appendf(pOut,"+%.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z); } b += m; if( i<nr-1 ){ m = R[r+i*3+3]; for(j=0; j<m; j++){ - blob_appendf(pOut," %s\n", B[b+j].z); + blob_appendf(pOut," %.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z); } b += m; a += m; } } @@ -401,11 +444,11 @@ /* Show the final common area */ assert( nr==i ); m = R[r+nr*3]; if( m>nContext ) m = nContext; for(j=0; j<m; j++){ - blob_appendf(pOut," %s\n", B[b+j].z); + blob_appendf(pOut," %.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z); } } free(R); R = 0; } @@ -422,19 +465,24 @@ ** COMMAND: test-rawdiff */ void test_rawdiff_cmd(void){ Blob a, b; int r; + int i; int *R; - if( g.argc!=4 ) usage("FILE1 FILE2"); + if( g.argc<4 ) usage("FILE1 FILE2 ..."); blob_read_from_file(&a, g.argv[2]); - blob_read_from_file(&b, g.argv[3]); - R = text_diff(&a, &b, 0, 0); - for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){ - printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]); - } - free(R); + for(i=3; i<g.argc; i++){ + if( i>3 ) printf("-------------------------------\n"); + blob_read_from_file(&b, g.argv[i]); + R = text_diff(&a, &b, 0, 0); + for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){ + printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]); + } + /* free(R); */ + blob_reset(&b); + } } /* ** COMMAND: test-udiff */
Modified src/merge.c from [4db3ee048d] to [5979683d08].
@@ -226,10 +226,11 @@ while( db_step(&q)==SQLITE_ROW ){ int ridm = db_column_int(&q, 0); int idv = db_column_int(&q, 1); int ridp = db_column_int(&q, 2); int ridv = db_column_int(&q, 3); + int rc; char *zName = db_text(0, "SELECT pathname FROM vfile WHERE id=%d", idv); char *zFullPath; Blob m, p, v, r; /* Do a 3-way merge of idp->idm into idp->idv. The results go into idv. */ if( detailFlag ){ @@ -237,17 +238,24 @@ }else{ printf("MERGE %s\n", zName); } undo_save(zName); zFullPath = mprintf("%s/%s", g.zLocalRoot, zName); - free(zName); content_get(ridp, &p); content_get(ridm, &m); blob_zero(&v); blob_read_from_file(&v, zFullPath); - blob_merge(&p, &m, &v, &r); - blob_write_to_file(&r, zFullPath); + rc = blob_merge(&p, &m, &v, &r); + if( rc>=0 ){ + blob_write_to_file(&r, zFullPath); + if( rc>0 ){ + printf("***** %d merge conflicts in %s\n", rc, zName); + } + }else{ + printf("***** Cannot merge binary file %s\n", zName); + } + free(zName); blob_reset(&p); blob_reset(&m); blob_reset(&v); blob_reset(&r); db_multi_exec("INSERT OR IGNORE INTO vmerge(id,merge) VALUES(%d,%d)",
Modified src/merge3.c from [2a2187581a] to [b966dfba2b].
@@ -24,625 +24,188 @@ ** This module implements a 3-way merge */ #include "config.h" #include "merge3.h" -#if 1 -# define DEBUG1(X) X +#if 0 +#define DEBUG(X) X #else -# define DEBUG1(X) -#endif -#if 0 -#define DEBUG2(X) X -/* -** For debugging: -** Print 16 characters of text from zBuf -*/ -static const char *print16(const char *z){ - int i; - static char zBuf[20]; - for(i=0; i<16; i++){ - if( z[i]>=0x20 && z[i]<=0x7e ){ - zBuf[i] = z[i]; - }else{ - zBuf[i] = '.'; - } - } - zBuf[i] = 0; - return zBuf; -} -#else -# define DEBUG2(X) +#define DEBUG(X) #endif /* -** Must be a 32-bit integer +** Opcodes */ -typedef unsigned int u32; - -/* -** Must be a 16-bit value -*/ -typedef unsigned short int u16; +#define CPY 0 +#define DEL 1 +#define INS 2 +#define END 3 +#define UNK 4 /* -** The width of a hash window in bytes. The algorithm only works if this -** is a power of 2. -*/ -#define NHASH 16 - -/* -** The current state of the rolling hash. +** Compare a single line of text from pV1 and pV2. If the lines +** are the same, return true. Return false if they are different. ** -** z[] holds the values that have been hashed. z[] is a circular buffer. -** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of -** the window. -** -** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted -** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1. -** (Each index for z[] should be module NHASH, of course. The %NHASH operator -** is omitted in the prior expression for brevity.) +** The cursor on both pV1 and pV2 is unchanged. */ -typedef struct hash hash; -struct hash { - u16 a, b; /* Hash values */ - u16 i; /* Start of the hash window */ - char z[NHASH]; /* The values that have been hashed */ -}; - -/* -** Initialize the rolling hash using the first NHASH characters of z[] -*/ -static void hash_init(hash *pHash, const char *z){ - u16 a, b, i; - a = b = 0; - for(i=0; i<NHASH; i++){ - a += z[i]; - b += (NHASH-i)*z[i]; - pHash->z[i] = z[i]; - } - pHash->a = a & 0xffff; - pHash->b = b & 0xffff; - pHash->i = 0; +static int sameLine(Blob *pV1, Blob *pV2){ + char *z1, *z2; + int i; + + z1 = blob_buffer(pV1); + z2 = blob_buffer(pV2); + for(i=0; z1[i]!='\n' && z1[i]==z2[i]; i++){} + return z2[i]=='\n' || (z2[i]=='\r' && z2[i+1]=='\n') + || (z1[i]=='\r' && z2[i]=='\n' && z1[i+1]=='\n'); } /* -** Advance the rolling hash by a single character "c" -*/ -static void hash_next(hash *pHash, int c){ - u16 old = pHash->z[pHash->i]; - pHash->z[pHash->i] = c; - pHash->i = (pHash->i+1)&(NHASH-1); - pHash->a = pHash->a - old + c; - pHash->b = pHash->b - NHASH*old + pHash->a; -} - -/* -** Return a 32-bit hash value +** Do a three-way merge. Initialize pOut to contain the result. +** +** The merge is an edit against pV2. Both pV1 and pV2 have a +** common origin at pPivot. Apply the changes of pPivot ==> pV1 +** to pV2. +** +** The return is 0 upon complete success. If any input file is binary, +** -1 is returned and pOut is unmodified. If there are merge +** conflicts, the merge proceeds as best as it can and the number +** of conflicts is returns */ -static u32 hash_32bit(hash *pHash){ - return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16); -} +int blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){ + int *aC1; /* Changes from pPivot to pV1 */ + int *aC2; /* Changes from pPivot to pV2 */ + int i1, i2; + int op1, op2; + int limit1, limit2; + int inConflict = 0; + int nConflict = 0; + static const char zBegin[] = ">>>>>>>> BEGIN MERGE CONFLICT <<<<<<<<\n"; + static const char zEnd[] = ">>>>>>>>> END MERGE CONFLICT <<<<<<<<<\n"; + + aC1 = text_diff(pPivot, pV1, 0, 0); + aC2 = text_diff(pPivot, pV2, 0, 0); + + if( aC2==0 || aC2==0 ){ + free(aC1); + free(aC2); + return -1; + } + blob_zero(pOut); + blob_rewind(pV1); + blob_rewind(pV2); + blob_rewind(pPivot); + + for(i1=0; aC1[i1] || aC1[i1+1] || aC1[i1+2]; i1+=3){} + limit1 = i1; + for(i2=0; aC2[i2] || aC2[i2+1] || aC2[i2+2]; i2+=3){} + limit2 = i2; -/* -** Maximum number of landmarks to set in the source file. -*/ -#define MX_LANDMARK (1024*128) + DEBUG( + for(i1=0; i1<limit1; i1+=3){ + printf("c1: %4d %4d %4d\n", aC1[i1], aC1[i1+1], aC1[i1+2]); + } + for(i2=0; i2<limit2; i2+=3){ + printf("c2: %4d %4d %4d\n", aC2[i2], aC2[i2+1], aC2[i2+2]); + } + ) -/* -** A mapping structure is used to record which parts of two -** files contain the same text. There are zero or more mapping -** entries in a mapping. Each entry maps a segment of text in -** the source file into a segment of the output file. -** -** fromFirst...fromLast -> toFirst...toLast -** -** Extra text might be inserted in the output file after a -** mapping. The nExtra parameter records the number of bytes -** of extra text to insert. -*/ -typedef struct Mapping Mapping; -struct Mapping { - int nMap; - int nUsed; - struct Mapping_entry { - int fromFirst, fromLast; - int toFirst, toLast; - int nExtra; - } *aMap; -}; + op1 = op2 = UNK; + i1 = i2 = 0; + while( i1<limit1 && aC1[i1]==0 ){ i1++; } + while( i2<limit2 && aC2[i2]==0 ){ i2++; } -/* -** Free malloced memory associated with a mapping. -*/ -static void MappingClear(Mapping *p){ - free(p->aMap); - memset(p, 0, sizeof(*p)); -} - -/* -** Add an entry to a mapping structure. The mapping is: -** -** a...b -> c...d -** -** The nExtra parameter is initially zero. It will be changed -** later if necessary. -*/ -static void MappingInsert(Mapping *p, int a, int b, int c, int d){ - struct Mapping_entry *pEntry; - int i; - for(i=0, pEntry=p->aMap; i<p->nUsed; i++, pEntry++){ - if( pEntry->fromFirst==a && pEntry->fromLast==b && pEntry->toFirst==c ){ - DEBUG2( printf("DUPLICATE: %6d..%-6d %6d..%d\n", a, b, c, d); ) - return; + while(1){ + if( op1==UNK ){ + if( i1>=limit1 ){ + op1 = END; + DEBUG( printf("get op1=END\n"); ) + }else{ + op1 = i1 % 3; + aC1[i1]--; + DEBUG( printf("get op1=%d from %d (%d->%d)\n", + op1,i1,aC1[i1]+1,aC1[i1]); ) + while( i1<limit1 && aC1[i1]==0 ){ i1++; } + } + } + if( op2==UNK ){ + if( i2>=limit2 ){ + op2 = END; + DEBUG( printf("get op2=END\n"); ) + }else{ + op2 = i2 % 3; + aC2[i2]--; + DEBUG( printf("get op2=%d from %d (%d->%d)\n", + op2,i2,aC2[i2]+1,aC2[i2]); ) + while( i2<limit2 && aC2[i2]==0 ){ i2++; } + } + } + DEBUG( printf("op1=%d op2=%d\n", op1, op2); ) + if( op1==END ){ + if( op2==INS ){ + blob_copy_lines(pOut, pV2, 1); + op2 = UNK; + }else{ + break; + } + }else if( op2==END ){ + if( op1==INS ){ + blob_copy_lines(pOut, pV1, 1); + op1 = UNK; + }else{ + break; + } + }else if( op1==INS && op2==INS ){ + if( !inConflict && sameLine(pV1, pV2) ){ + blob_copy_lines(pOut, pV2, 1); + blob_copy_lines(0, pV1, 0); + op1 = UNK; + op2 = UNK; + }else{ + if( !inConflict ){ + inConflict = 1; + nConflict++; + blob_appendf(pOut, zBegin); + } + blob_copy_lines(pOut, pV1, 1); + op1 = UNK; + } + }else if( op1==INS ){ + blob_copy_lines(pOut, pV1, 1); + op1 = UNK; + }else if( op2==INS ){ + blob_copy_lines(pOut, pV2, 1); + op2 = UNK; + }else{ + if( inConflict ){ + inConflict = 0; + blob_appendf(pOut, zEnd); + } + if( op1==DEL || op2==DEL ){ + blob_copy_lines(0, pPivot, 1); + if( op2==CPY ){ + blob_copy_lines(0, pV2, 1); + } + if( op1==CPY ){ + blob_copy_lines(0, pV1, 1); + } + }else{ + assert( op1==CPY && op2==CPY ); + blob_copy_lines(pOut, pPivot, 1); + blob_copy_lines(0, pV1, 1); + blob_copy_lines(0, pV2, 1); + } + op1 = UNK; + op2 = UNK; } } - if( p->nUsed>=p->nMap ){ - p->nMap = p->nMap * 2 + 10; - p->aMap = realloc(p->aMap, p->nMap*sizeof(p->aMap[0]) ); - if( p->aMap==0 ) exit(1); - } - pEntry = &p->aMap[p->nUsed]; - pEntry->fromFirst = a; - pEntry->fromLast = b; - pEntry->toFirst = c; - pEntry->toLast = d; - pEntry->nExtra = 0; - p->nUsed++; -} - -DEBUG1( -/* -** For debugging purposes: -** Print the content of a mapping. -*/ -static void MappingPrint(Mapping *pMap){ - int i; - struct Mapping_entry *p; - for(i=0, p=pMap->aMap; i<pMap->nUsed; i++, p++){ - printf("%6d..%-6d %6d..%-6d %d\n", - p->fromFirst, p->fromLast, - p->toFirst, p->toLast, p->nExtra); - } -} -) - -/* -** Remove deleted entries from a mapping. Deleted enties have -** an fromFirst of less than 0. -*/ -static void MappingPurgeDeletedEntries(Mapping *p){ - int i, j; - for(i=j=0; i<p->nUsed; i++){ - if( p->aMap[i].fromFirst<0 ) continue; - if( j<i ){ - p->aMap[j] = p->aMap[i]; - } - j++; - } - p->nUsed = j; -} - -/* -** Comparisons functions used for sorting elements of a Mapping -*/ -static int intAbs(int x){ return x<0 ? -x : x; } -static int compareSize(const void *a, const void *b){ - const struct Mapping_entry *A = (const struct Mapping_entry*)a; - const struct Mapping_entry *B = (const struct Mapping_entry*)b; - int rc; - rc = (B->fromLast - B->fromFirst) - (A->fromLast - A->fromFirst); - if( rc==0 ){ - rc = intAbs(A->toFirst - A->fromFirst) - - intAbs(B->toFirst - B->fromFirst); - } - return rc; -} -static int compareFrom(const void *a, const void *b){ - const struct Mapping_entry *A = (const struct Mapping_entry*)a; - const struct Mapping_entry *B = (const struct Mapping_entry*)b; - return A->fromFirst - B->fromFirst; -} -static int compareTo(const void *a, const void *b){ - const struct Mapping_entry *A = (const struct Mapping_entry*)a; - const struct Mapping_entry *B = (const struct Mapping_entry*)b; - return A->toFirst - B->toFirst; -} - -/* -** Routines for sorting the entries of a mapping. SortSize sorts -** the entries in order of decreasing size (largest first.) -** SortFrom and SortTo sort the entries in order of increasing -** fromFirst and toFirst. -*/ -static void MappingSortSize(Mapping *p){ - qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareSize); -} -static void MappingSortFrom(Mapping *p){ - qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareFrom); -} -static void MappingSortTo(Mapping *p){ - qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareTo); -} - -/* -** Initialize pMap to contain a set of similarities between two files. -*/ -static void MappingInit( - const char *zSrc, /* The source or pattern file */ - int lenSrc, /* Length of the source file */ - const char *zOut, /* The target file */ - int lenOut, /* Length of the target file */ - Mapping *pMap /* Write the map of similaries here */ -){ - int i, j, base, prefix; - int changes; - hash h; - int *collide; - int origLenOut = lenOut; - struct Mapping_entry *aMap; - int landmark[MX_LANDMARK]; - - /* - ** Initialize the map - */ - memset(pMap, 0, sizeof(*pMap)); - - /* - ** Find common prefix and suffix - */ - if( lenSrc<=NHASH || lenOut<=NHASH ){ - MappingInsert(pMap, 0, 0, 0, 0); - goto add_nextra; - } - for(i=0; i<lenSrc && i<lenOut && zSrc[i]==zOut[i]; i++){} - if( i>=NHASH ){ - MappingInsert(pMap, 0, i-1, 0, i-1); - lenSrc -= i; - zSrc += i; - lenOut -= i; - zOut += i; - if( lenSrc<=0 || lenOut<=0 ) goto add_nextra; - prefix = i; - }else{ - prefix = 0; - } - for(i=1; i<=lenSrc && i<=lenOut && zSrc[lenSrc-i]==zOut[lenOut-i]; i++){} - if( i>NHASH ){ - MappingInsert(pMap, prefix+lenSrc-i+1, prefix+lenSrc-1, - prefix+lenOut-i+1, prefix+lenOut-1); - lenSrc -= i; - lenOut -= i; + if( inConflict ){ + blob_appendf(pOut, zEnd); } - /* If the source file is very small, it means that we have no - ** chance of ever finding any matches. We can leave early. - */ - if( lenSrc<=NHASH ) goto add_nextra; - - /* Compute the hash table used to locate matching sections in the - ** source file. - */ - collide = malloc( lenSrc*sizeof(int)/NHASH ); - if( collide==0 ) return; - memset(landmark, -1, sizeof(landmark)); - memset(collide, -1, lenSrc*sizeof(int)/NHASH ); - for(i=0; i<lenSrc-NHASH; i+=NHASH){ - int hv; - hash_init(&h, &zSrc[i]); - hv = hash_32bit(&h) & (MX_LANDMARK-1); - collide[i/NHASH] = landmark[hv]; - landmark[hv] = i/NHASH; - } - - /* Begin scanning the target file and generating mappings. In this - ** step, we generate as many mapping entries is we can. Many of these - ** entries might overlap. The overlapping entries are removed by - ** the loop the follows. - */ - base = 0; /* We have already checked everything before zOut[base] */ - while( base<lenOut-NHASH ){ - int iSrc, iBlock, nextBase, nextBaseI; - hash_init(&h, &zOut[base]); - i = 0; /* Trying to match a landmark against zOut[base+i] */ - nextBaseI = NHASH; - nextBase = base; - while(1){ - int hv; - - hv = hash_32bit(&h) & (MX_LANDMARK-1); - DEBUG2( printf("LOOKING: %d+%d+%d=%d [%s]\n", - prefix,base,i,prefix+base+i, print16(&zOut[base+i])); ) - iBlock = landmark[hv]; - while( iBlock>=0 ){ - /* - ** The hash window has identified a potential match against - ** landmark block iBlock. But we need to investigate further. - ** - ** Look for a region in zOut that matches zSrc. Anchor the search - ** at zSrc[iSrc] and zOut[base+i]. - ** - ** Set cnt equal to the length of the match and set ofst so that - ** zSrc[ofst] is the first element of the match. - */ - int cnt, ofstSrc; - int j, k, x, y; - - /* Beginning at iSrc, match forwards as far as we can. j counts - ** the number of characters that match */ - iSrc = iBlock*NHASH; - for(j=0, x=iSrc, y=base+i; x<lenSrc && y<lenOut; j++, x++, y++){ - if( zSrc[x]!=zOut[y] ) break; - } - j--; - - /* Beginning at iSrc-1, match backwards as far as we can. k counts - ** the number of characters that match */ - for(k=1; k<iSrc && k<=base+i; k++){ - if( zSrc[iSrc-k]!=zOut[base+i-k] ) break; - } - k--; - - /* Compute the offset and size of the matching region zSrc */ - ofstSrc = iSrc-k; - cnt = j+k+1; - DEBUG2( printf("MATCH %d bytes at SRC[%d..%d]: [%s]\n", - cnt, ofstSrc, ofstSrc+cnt-1, print16(&zSrc[ofstSrc])); ) - if( cnt>NHASH ){ - int ofstOut = base+i-k; - DEBUG2( printf("COPY %6d..%-6d %6d..%d\n", - prefix+ofstSrc, prefix+ofstSrc+cnt-1, - prefix+ofstOut, prefix+ofstOut+cnt-1); ) - MappingInsert(pMap, - prefix+ofstSrc, prefix+ofstSrc+cnt-1, - prefix+ofstOut, prefix+ofstOut+cnt-1); - if( nextBase < ofstOut+cnt-1 ){ - nextBase = ofstOut+cnt-1; - nextBaseI = i+NHASH; - } - } - - /* Check the next matching block */ - iBlock = collide[iBlock]; - } - - /* If we found a match, then jump out to the outer loop and begin - ** a new cycle. - */ - if( nextBase>base && i>=nextBaseI ){ - base = nextBase; - break; - } - - /* Advance the hash by one character. Keep looking for a match */ - if( base+i+NHASH>=lenOut ){ - base = lenOut; - break; - } - hash_next(&h, zOut[base+i+NHASH]); - i++; - } - } - free(collide); -#if 0 - DEBUG1( - printf("after creation:\n"); - MappingPrint(pMap); - ) -#endif - - /* In this step, we will remove overlapping entries from the mapping. - ** - ** We use a greedy algorithm. Select the largest mapping first and - ** remove all overlapping mappings. Then take the next largest - ** mapping and remove others that overlap with it. Keep going until - ** all mappings have been processed. - */ - MappingSortSize(pMap); - do{ - changes = 0; - for(i=0; i<pMap->nUsed; i++){ - int sortNeeded = 0; - int purgeNeeded = 0; - struct Mapping_entry *pA; - pA = &pMap->aMap[i]; - for(j=i+1; j<pMap->nUsed; j++){ - int diff; - struct Mapping_entry *pB; - pB = &pMap->aMap[j]; - if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){ - /* No overlap. Do nothing */ - }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){ - /* B is contained entirely within A. Drop B */ - pB->fromFirst = -1; - purgeNeeded = 1; - continue; - }else if( pB->fromFirst<pA->fromFirst ){ - /* The tail B overlaps the head of A */ - assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast ); - diff = pB->fromLast + 1 - pA->fromFirst; - pB->fromLast -= diff; - pB->toLast -= diff; - sortNeeded = 1; - }else{ - /* The head of B overlaps the tail of A */ - assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast ); - diff = pA->fromLast + 1 - pB->fromFirst; - pB->fromFirst += diff; - pB->toFirst += diff; - sortNeeded = 1; - } - if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){ - /* No overlap. Do nothing */ - }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){ - /* B is contained entirely within A. Drop B */ - pB->fromFirst = -1; - purgeNeeded = 1; - }else if( pB->toFirst<pA->toFirst ){ - /* The tail of B overlaps the head of A */ - assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast ); - diff = pB->toLast + 1 - pA->toFirst; - pB->fromLast -= diff; - pB->toLast -= diff; - sortNeeded = 1; - }else{ - /* The head of B overlaps the tail of A */ - assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast ); - diff = pA->toLast + 1 - pB->toFirst; - pB->fromFirst += diff; - pB->toFirst += diff; - sortNeeded = 1; - } - } - if( purgeNeeded ){ - MappingPurgeDeletedEntries(pMap); - /* changes++; */ - } - if( sortNeeded && i<pMap->nUsed-2 ){ - MappingSortSize(pMap); - /* changes++; */ - } - } - }while( changes ); - - /* Final step: Arrange the mapping entires so that they are in the - ** order of the output file. Then fill in the nExtra values. - */ -add_nextra: - MappingSortTo(pMap); - aMap = pMap->aMap; - for(i=0; i<pMap->nUsed-1; i++){ - aMap[i].nExtra = aMap[i+1].toFirst - aMap[i].toLast - 1; - } - if( pMap->nUsed>0 && origLenOut > aMap[i].toLast+1 ){ - aMap[i].nExtra = origLenOut - aMap[i].toLast - 1; - } -} - -/* -** Translate an index into a file using a mapping. -** -** The mapping "p" shows how blocks in the input file map into blocks -** of the output file. The index iFrom is an index into the input file. -** This routine returns the index into the output file of the corresponding -** character. -** -** If pInserted!=0 and iFrom points to the last character before a -** insert in the output file, then the return value is adjusted forward -** so that it points to the end of the insertion and the number of -** bytes inserted is written into *pInserted. If pInserted==0 then -** iFrom always maps directly in the corresponding output file -** index regardless of whether or not it points to the last character -** before an insertion. -*/ -static int MappingTranslate(Mapping *p, int iFrom, int *pInserted){ - int i; - for(i=0; i<p->nUsed; i++){ - if( iFrom>p->aMap[i].fromLast ) continue; - if( iFrom<=p->aMap[i].fromFirst ){ - return p->aMap[i].toFirst; - } - if( pInserted && iFrom==p->aMap[i].fromLast ){ - int n = p->aMap[i].nExtra; - *pInserted = n; - return p->aMap[i].toLast + n; - }else{ - return p->aMap[i].toFirst + iFrom - p->aMap[i].fromFirst; - } - } - i--; - return p->aMap[i].toLast + p->aMap[i].nExtra; -} - -/* -** Do a three-way merge. Initialize pOut to contain the result. -*/ -void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){ - Mapping map1, map2, map3; - int i, j; - const char *zV1, *zV2; - blob_zero(pOut); - MappingInit( - blob_buffer(pPivot), blob_size(pPivot), - blob_buffer(pV1), blob_size(pV1), - &map1); - MappingSortFrom(&map1); - DEBUG1( - printf("map1-final:\n"); - MappingPrint(&map1); - ) - MappingInit( - blob_buffer(pPivot), blob_size(pPivot), - blob_buffer(pV2), blob_size(pV2), - &map2); - DEBUG1( - printf("map2-final:\n"); - MappingPrint(&map2); - ) - MappingInit( - blob_buffer(pV1), blob_size(pV1), - blob_buffer(pV2), blob_size(pV2), - &map3); - DEBUG1( - printf("map3-final:\n"); - MappingPrint(&map3); - ) - zV1 = blob_buffer(pV1); - zV2 = blob_buffer(pV2); - if( map1.aMap[0].toFirst>0 ){ - blob_append(pOut, zV1, map1.aMap[0].toFirst); - DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n", - map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); ) - } - if( map2.aMap[0].toFirst>0 ){ - blob_append(pOut, zV2, map2.aMap[0].toFirst); - DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n", - map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); ) - } - for(i=j=0; i<map2.nUsed; i++){ - int iFirst, iLast, nInsert, iTail; - struct Mapping_entry *p = &map2.aMap[i]; - while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; } - DEBUG1( - printf("map2: %6d..%-6d %6d..%-6d %d\n", - p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra); - printf("map3: j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst, - map3.aMap[j].toLast); - ); - iTail = p->toLast + p->nExtra; - if( i<map2.nUsed-1 && - map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){ - blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1); - DEBUG1( - printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1, - p->toFirst, iTail); - ) - continue; - } - iFirst = MappingTranslate(&map1, p->fromFirst, 0); - iLast = MappingTranslate(&map1, p->fromLast, &nInsert); - blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1); - DEBUG1( - printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast); - ) - if( p->nExtra>0 ){ - if( p->nExtra==nInsert && - memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){ - /* Omit a duplicate insert */ - DEBUG1( printf("OMIT duplicate insert\n"); ) - }else{ - blob_append(pOut, &zV2[p->toLast+1], p->nExtra); - DEBUG1( - printf("INSERT %d bytes from V2[%d..%d]\n", - p->nExtra, p->toLast+1, p->toLast+p->nExtra); - ) - } - } - } - MappingClear(&map1); - MappingClear(&map2); - MappingClear(&map3); + free(aC1); + free(aC2); + return nConflict; } /* ** COMMAND: test-3-way-merge ** @@ -674,44 +237,6 @@ } blob_reset(&pivot); blob_reset(&v1); blob_reset(&v2); blob_reset(&merged); -} - - -/* -** COMMAND: test-mapping -*/ -void mapping_test(void){ - int i; - const char *za, *zb; - Blob a, b; - Mapping map; - if( g.argc!=4 ){ - usage("FILE1 FILE2"); - } - blob_read_from_file(&a, g.argv[2]); - blob_read_from_file(&b, g.argv[3]); - memset(&map, 0, sizeof(map)); - MappingInit(blob_buffer(&a), blob_size(&a), - blob_buffer(&b), blob_size(&b), - &map); - DEBUG1( - printf("map-final:\n"); - MappingPrint(&map); - ) - za = blob_buffer(&a); - zb = blob_buffer(&b); - for(i=0; i<map.nUsed; i++){ - printf("======= %6d..%-6d %6d..%-6d %d\n", - map.aMap[i].fromFirst, map.aMap[i].fromLast, - map.aMap[i].toFirst, map.aMap[i].toLast, - map.aMap[i].nExtra); - printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1, - &za[map.aMap[i].fromFirst]); - if( map.aMap[i].nExtra ){ - printf("======= EXTRA:\n"); - printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]); - } - } }
Modified src/update.c from [abfa21b27a] to [86ce400827].
@@ -206,25 +206,34 @@ free(zFullPath); } }else if( idt>0 && idv>0 && ridt!=ridv && chnged ){ /* Merge the changes in the current tree into the target version */ Blob e, r, t, v; + int rc; char *zFullPath; printf("MERGE %s\n", zName); undo_save(zName); zFullPath = mprintf("%s/%s", g.zLocalRoot, zName); content_get(ridt, &t); content_get(ridv, &v); blob_zero(&e); blob_read_from_file(&e, zFullPath); - blob_merge(&v, &e, &t, &r); - blob_write_to_file(&r, zFullPath); + rc = blob_merge(&v, &e, &t, &r); + if( rc>=0 ){ + blob_write_to_file(&r, zFullPath); + if( rc>0 ){ + printf("***** %d merge conflicts in %s\n", rc, zName); + } + }else{ + printf("***** Cannot merge binary file %s\n", zName); + } free(zFullPath); blob_reset(&v); blob_reset(&e); blob_reset(&t); blob_reset(&r); + } } db_finalize(&q); /*
Modified test/merge1.test from [419c5c1aa7] to [eed9903dc2].
@@ -77,18 +77,31 @@ 333 - This is a test of the merging algohm - 3333 444 - If all goes well, we will be pleased - 4444 555 - we think it well and other stuff too - 5555 } write_file_indented t23 { - 111 - This is line ONE OF the demo program - 1111 + >>>>>>>> BEGIN MERGE CONFLICT <<<<<<<< + 111 - This is line ONE of the demo program - 1111 + 111 - This is line one OF the demo program - 1111 + >>>>>>>>> END MERGE CONFLICT <<<<<<<<< + 222 - The second line program line in code - 2222 + 333 - This is a test of the merging algohm - 3333 + 444 - If all goes well, we will be pleased - 4444 + 555 - we think it well and other stuff too - 5555 +} +write_file_indented t32 { + >>>>>>>> BEGIN MERGE CONFLICT <<<<<<<< + 111 - This is line one OF the demo program - 1111 + 111 - This is line ONE of the demo program - 1111 + >>>>>>>>> END MERGE CONFLICT <<<<<<<<< 222 - The second line program line in code - 2222 333 - This is a test of the merging algohm - 3333 444 - If all goes well, we will be pleased - 4444 555 - we think it well and other stuff too - 5555 } fossil test-3 t1 t3 t2 a32 -test merge1-2.1 {[same_file t23 a32]} +test merge1-2.1 {[same_file t32 a32]} fossil test-3 t1 t2 t3 a23 test merge1-2.2 {[same_file t23 a23]} write_file_indented t1 { 111 - This is line one of the demo program - 1111 @@ -222,37 +235,5 @@ } fossil test-3 t1 t3 t2 a32 test merge1-6.1 {[same_file t32 a32]} fossil test-3 t1 t2 t3 a23 test merge1-6.2 {[same_file t32 a23]} - -# 123456789 123456789 123456789 123456789 123456789 123456789 -write_file_indented t1 { - 111 - This is line one of the demo program - 1111 - 222 - The second line program line in code - 2222 - 333 - This is a test of the merging algohm - 3333 - 444 - If all goes well, we will be pleased - 4444 - 555 - we think it well and other stuff too - 5555 -} -write_file_indented t2 { - 222 - The second line program line in code - 2222 - 333 - This is a test of THREE rging algohm - 3333 - 444 - If all goes well, we will be pleased - 4444 - 111 - This is line one of the demo program - 1111 - 555 - we think it well and other stuff too - 5555 -} -write_file_indented t3 { - 111 - This is line ONEONE the demo program - 1111 - 222 - The second line program line in code - 2222 - 333 - This is a test of the merging algohm - 3333 - 444 - If all goes well, we will be pleased - 4444 - 555 - we think it FIVEFIVE other stuff too - 5555 -} -write_file_indented t32 { - 222 - The second line program line in code - 2222 - 333 - This is a test of THREE rging algohm - 3333 - 444 - If all goes well, we will be pleased - 4444 - 111 - This is line ONEONE the demo program - 1111 - 555 - we think it FIVEFIVE other stuff too - 5555 -} -fossil test-3 t1 t3 t2 a32 -test merge1-6.1 {[same_file t32 a32]}
Modified test/tester.tcl from [8cd24c134d] to [ba01105cd0].
@@ -87,11 +87,15 @@ } # Return true if two files are the same # proc same_file {a b} { - return [expr {[read_file $a]==[read_file $b]}] + set x [read_file $a] + regsub -all { +\n} $x \n x + set y [read_file $b] + regsub -all { +\n} $y \n y + return [expr {$x==$y}] } # Perform a test # proc test {name expr} {