Check-in [95af789e1f]
Not logged in
Overview

SHA1 Hash:95af789e1fa51e6eac3e4ee857422844a935149e
Date: 2007-11-10 20:40:06
User: aku
Comment:Oops. pass 5 is not complete. Missed the breaking of internal dependencies, this is done in this pass already. Extended pass _2_ and file revisions with code to save the branchchildren (possible dependencies), and pass 5 and changesets with the proper algorithm. From cvs2svn, works, do not truly like it, as it throws away and recomputes a lot of state after each split of a cset. Could update and reuse the state to perform all splits in one go. Will try that next, for now we have a working form in the code base.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified tools/cvs2fossil/lib/c2f_frev.tcl from [edfbe469f8] to [b685fd71ec].

@@ -365,10 +365,19 @@
 	    VALUES               ($myid, $fid, $myrevnr, $lod, @P@,    @C@,   $idb,       @DP,      @DC,     @BP    , $op, $mydate, $mystate, $mymetaid, $coff, $clen);
 	}
 
 	state transaction {
 	    state run [string map $map $cmd]
+
+	    # And the branch children as well, for pass 5.
+	    foreach bc $mybranchchildren {
+		set bcid [$bc id]
+		state run {
+		    INSERT INTO revisionbranchchildren (rid,   brid)
+		    VALUES                             ($myid, $bcid);
+		}
+	    }
 	}
 	return
     }
 
     # # ## ### ##### ######## #############

Modified tools/cvs2fossil/lib/c2f_pcollrev.tcl from [978a2b3f74] to [9385b6cf1b].

@@ -143,10 +143,11 @@
 	    coff  INTEGER  NOT NULL,
 	    clen  INTEGER  NOT NULL,
 
 	    UNIQUE (fid, rev) -- The DTN is unique within the revision's file.
 	}
+
 	state writing optype {
 	    oid   INTEGER  NOT NULL  PRIMARY KEY,
 	    name  TEXT     NOT NULL,
 	    UNIQUE(name)
 	}
@@ -154,10 +155,22 @@
 	    INSERT INTO optype VALUES (-1,'delete');  -- The opcode names are the
 	    INSERT INTO optype VALUES ( 0,'nothing'); -- fixed pieces, see myopstate
 	    INSERT INTO optype VALUES ( 1,'add');     -- in file::rev. myopcode is
 	    INSERT INTO optype VALUES ( 2,'change');  -- loaded from this.
 	}
+
+	state writing revisionbranchchildren {
+	    -- The non-primary children of a revision, as reachable
+	    -- through a branch symbol, are listed here. This is
+	    -- needed by pass 5 to break internal dependencies in a
+	    -- changeset.
+
+	    rid   INTEGER  NOT NULL  REFERENCES revision,
+	    brid  INTEGER  NOT NULL  REFERENCES revision,
+	    UNIQUE(rid,brid)
+	}
+
 	state writing tag {
 	    tid  INTEGER  NOT NULL  PRIMARY KEY AUTOINCREMENT,
 	    fid  INTEGER  NOT NULL  REFERENCES file,     -- File the item belongs to
 	    lod  INTEGER            REFERENCES symbol,   -- Line of development (NULL => Trunk)
 	    sid  INTEGER  NOT NULL  REFERENCES symbol,   -- Symbol capturing the tag

Modified tools/cvs2fossil/lib/c2f_pinitcsets.tcl from [aae0715d5d] to [8f42ee8f95].

@@ -45,10 +45,11 @@
 	# Define the names and structure of the persistent state of
 	# this pass.
 
 	state reading meta
 	state reading revision
+	state reading revisionbranchchildren
 	state reading branch
 	state reading tag
 	state reading symbol
 
 	# Data per changeset, namely the project it belongs to, how it
@@ -109,13 +110,14 @@
 	# Pass manager interface. Executed to perform the
 	# functionality of the pass.
 
 	set csets {}
 	state transaction {
-	    CreateRevisionChangesets csets ; # Group file revisions into csets.
-	    CreateSymbolChangesets   csets ; # Create csets for tags and branches.
-	    PersistTheChangesets    $csets
+	    CreateRevisionChangesets  csets ; # Group file revisions into csets.
+	    BreakInternalDependencies csets ; # Split the csets based on internal conflicts.
+	    CreateSymbolChangesets    csets ; # Create csets for tags and branches.
+	    PersistTheChangesets     $csets
 	}
 	return
     }
 
     typemethod discard {} {
@@ -269,18 +271,64 @@
 
 	log write 4 initcsets "Created [nsp $n {symbol changeset}]"
 	return
     }
 
+    proc BreakInternalDependencies {cv} {
+	upvar 1 $cv csets
+
+	# This code operates on the revision changesets created by
+	# 'CreateRevisionChangesets'. As such it has to follow after
+	# it, before the symbol changesets are made. The changesets
+	# are inspected for internal conflicts and any such are broken
+	# by splitting the problematic changeset into multiple
+	# fragments. The results are changesets which have no internal
+	# dependencies, only external ones.
+
+	log write 3 initcsets {Break internal dependencies}
+	set n 0
+
+	foreach cset $csets {
+	    # The main method for splitting does only one split, which
+	    # may not be enough. The code here iterates until no more
+	    # splits can be performed. An iterative algorithm was
+	    # chosen over a recursive one to prevent running into
+	    # stack limits.
+
+	    set tosplit [list $cset]
+	    set at 0
+	    while {$at < [llength $tosplit]} {
+		# Note here how we are __not__ advancing in the list
+		#      when we were able to break the current
+		#      changeset into two pieces, causing the loop to
+		#      immediately check the first of the two pieces
+		#      again for further break possibilities. The
+		#      other piece is added at the end, thus processed
+		#      later.
+		while {[[lindex $tosplit $at] breakinternaldependencies tosplit]} {}
+		incr at
+	    }
+
+	    # At last the generated fragments are added to the main
+	    # list of changesets. The first element is skipped as it
+	    # is already in the list.
+	    foreach cset [lrange $tosplit 1 end] { lappend csets $cset ; incr n }
+	}
+
+	log write 4 initcsets "Created [nsp $n {additional revision changeset}]"
+	log write 4 initcsets Ok.
+	return
+    }
+
     proc PersistTheChangesets {csets} {
-	log write 3 initcsets {Saving the created changesets to the persistent state}
+	log write 3 initcsets "Saving [nsp [llength $csets] {initial changeset}] to the persistent state"
 
 	foreach cset $csets {
 	    $cset persist
 	}
 
-	log write 4 initcsets {Ok.}
+	log write 4 initcsets Ok.
 	return
     }
 
     # # ## ### ##### ######## #############
     ## Configuration

Modified tools/cvs2fossil/lib/c2f_prev.tcl from [855ccb9239] to [d69fb88848].

@@ -16,10 +16,11 @@
 # # ## ### ##### ######## ############# #####################
 ## Requirements
 
 package require Tcl 8.4                               ; # Required runtime.
 package require snit                                  ; # OO system.
+package require vc::tools::log                        ; # User feedback.
 package require vc::fossil::import::cvs::state        ; # State storage.
 
 # # ## ### ##### ######## ############# #####################
 ##
 
@@ -32,10 +33,174 @@
 	set myproject   $project
 	set mytype      $cstype
 	set mysrcid	$srcid
 	set myrevisions $revisions
 	return
+    }
+
+    method id {} { return $myid }
+
+    method breakinternaldependencies {cv} {
+	upvar 2 $cv csets ; # simple-dispatch!
+
+	# This method inspects the changesets for internal
+	# dependencies. Nothing is done if there are no
+	# such. Otherwise the changeset is split into a set of
+	# fragments without internal dependencies, transforming the
+	# internal dependencies into external ones. The new changesets
+	# are added to the list of all changesets.
+
+	# Actually at most one split is performed, resulting in at
+	# most one additional fragment. It is the caller's
+	# responsibility to spli the resulting fragments further.
+
+	# The code checks only sucessor dependencies, automatically
+	# covering the predecessor dependencies as well (A sucessor
+	# dependency a -> b is a predecessor dependency b -> a).
+
+	# Array of dependencies (parent -> child). This is pulled from
+	# the state, and limited to successors within the changeset.
+	array set dependencies {}
+
+	set theset ('[join $myrevisions {','}]')
+
+	foreach {rid child} [state run "
+	    SELECT R.rid, R.child
+	    FROM   revision R
+	    WHERE  R.rid   IN $theset
+	    AND    R.child IS NOT NULL
+	    AND    R.child IN $theset
+    UNION
+	    SELECT R.rid, R.dbchild
+	    FROM   revision R
+	    WHERE  R.rid   IN $theset
+	    AND    R.dbchild IS NOT NULL
+	    AND    R.dbchild IN $theset
+    UNION
+	    SELECT R.rid, B.brid
+	    FROM   revision R, revisionbranchchildren B
+	    WHERE  R.rid   IN $theset
+	    AND    R.rid = B.rid
+	    AND    B.brid IN $theset
+	"] {
+	    # Consider moving this to the integrity module.
+	    if {$rid == $child} {
+		trouble internal "Revision $rid depends on itself."
+	    }
+	    set dependencies($rid) $child
+	}
+
+	if {![array size dependencies]} {return 0} ; # Nothing to break.
+
+	# We have internal dependencies to break. We now iterate over
+	# all positions in the list (which is chronological, at least
+	# as far as the timestamps are correct and unique) and
+	# determine the best position for the break, by trying to
+	# break as many dependencies as possible in one go.
+
+	# First we create a map of positions to make it easier to
+	# determine whether a dependency cross a particular index.
+
+	array set pos {}
+	array set crossing {}
+	set n 0
+	foreach rev $myrevisions {
+	    set pos($rev) $n
+	    set crossing($n) 0
+	    incr n
+	}
+
+	# Secondly we count the crossings per position, by iterating
+	# over the recorded internal dependencies.
+
+	foreach {rid child} [array get dependencies] {
+	    set start $pos($rid)
+	    set end $pos($child)
+
+	    # Note: If the timestamps are badly out of order it is
+	    #       possible to have a backward successor dependency,
+	    #       i.e. with start > end. We may have to swap the
+	    #       indices to ensure that the following loop runs
+	    #       correctly.
+	    #
+	    # Note 2: start == end is not possible. It indicates a
+	    #         self-dependency due to the uniqueness of
+	    #         positions, and that is something we have ruled
+	    #         out already.
+
+	    if {$start > $end} {
+		while {$end < $start} { incr crossing($end)   ; incr end }
+	    } else {
+		while {$start < $end} { incr crossing($start) ; incr start }
+	    }
+	}
+
+	# Now we can determine the best break location. First we look
+	# for the locations with the maximal number of crossings. If
+	# there are several we look for the shortest time interval
+	# among them. If we still have multiple possibilities after
+	# that we select the smallest index among these.
+
+	set max -1
+	set best {}
+
+	foreach key [array names crossing] {
+	    set now $crossing($key)
+	    if {$now > $max} {
+		set max $now
+		set best $key
+		continue
+	    } elseif {$now == $max} {
+		lappend best $key
+	    }
+	}
+
+	if {[llength $best] > 1} {
+	    set min -1
+	    set newbest {}
+	    foreach at $best {
+		set rat   [lindex $myrevisions $at] ; incr at
+		set rnext [lindex $myrevisions $at] ; incr at -1
+		set tat   [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rat  }] 0]
+		set tnext [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rnext}] 0]
+		set delta [expr {$tnext - $tat}]
+		if {($min < 0) || ($delta < $min)} {
+		    set min $delta
+		    set newbest $at
+		} elseif {$delta == $min} {
+		    lappend newbest $at
+		}
+	    }
+	    set best $newbest
+	}
+
+	if {[llength $best] > 1} {
+	    set best [lindex [lsort -integer -increasing $best] 0]
+	}
+
+	# Now we can split off a fragment.
+
+	set bnext $best ; incr bnext
+	set revbefore [lrange $myrevisions 0 $best]
+	set revafter  [lrange $myrevisions $bnext end]
+
+	if {![llength $revbefore]} {
+	    trouble internal "Tried to split off a zero-length fragment at the beginning"
+	}
+	if {![llength $revafter]} {
+	    trouble internal "Tried to split off a zero-length fragment at the end"
+	}
+
+	lappend csets [set new [$type %AUTO% $myproject $mytype $mysrcid $revafter]]
+	set myrevisions $revbefore
+
+	log write 4 csets "Breaking <$myid> @$best, making <[$new id]>, cutting $crossing($best)"
+
+	#puts "\tKeeping   <$revbefore>"
+	#puts "\tSplit off <$revafter>"
+
+	return 1
     }
 
     method persist {} {
 	set tid $mycstype($mytype)
 	set pid [$myproject id]
@@ -92,13 +257,15 @@
 
 namespace eval ::vc::fossil::import::cvs::project {
     namespace export rev
     namespace eval rev {
 	namespace import ::vc::fossil::import::cvs::state
+	namespace import ::vc::tools::log
+	log register csets
     }
 }
 
 # # ## ### ##### ######## ############# #####################
 ## Ready
 
 package provide vc::fossil::import::cvs::project::rev 1.0
 return