Fossil: Diff

Differences From:

File tools/cvs2fossil/lib/c2f_prev.tcl part of check-in [95af789e1f] - Oops. pass 5 is not complete. Missed the breaking of internal dependencies, this is done in this pass already. Extended pass _2_ and file revisions with code to save the branchchildren (possible dependencies), and pass 5 and changesets with the proper algorithm. From cvs2svn, works, do not truly like it, as it throws away and recomputes a lot of state after each split of a cset. Could update and reuse the state to perform all splits in one go. Will try that next, for now we have a working form in the code base. by aku on 2007-11-10 20:40:06. [view]
To:

File tools/cvs2fossil/lib/c2f_prev.tcl part of check-in [08ebab80cd] - Rewrote the algorithm for breaking internal dependencies to my liking. The complex part handling multiple splits has moved from the pass code to the changeset class itself, reusing the state computed for the first split. The state is a bit more complex to allow for its incremental update after a break has been done. Factored major pieces into separate procedures to keep the highlevel code readable. Added lots of official log output to help debugging in case of trouble. by aku on 2007-11-10 23:44:29. [view]
@@ -17,8 +17,10 @@
 ## Requirements
 
 package require Tcl 8.4                               ; # Required runtime.
 package require snit                                  ; # OO system.
+package require vc::tools::misc                       ; # Text formatting
+package require vc::tools::trouble                    ; # Error reporting.
 package require vc::tools::log                        ; # User feedback.
 package require vc::fossil::import::cvs::state        ; # State storage.
 
 # # ## ### ##### ######## ############# #####################
@@ -48,156 +50,137 @@
 	# fragments without internal dependencies, transforming the
 	# internal dependencies into external ones. The new changesets
 	# are added to the list of all changesets.
 
-	# Actually at most one split is performed, resulting in at
-	# most one additional fragment. It is the caller's
-	# responsibility to spli the resulting fragments further.
-
-	# The code checks only sucessor dependencies, automatically
-	# covering the predecessor dependencies as well (A sucessor
-	# dependency a -> b is a predecessor dependency b -> a).
+	# We perform all necessary splits in one go, instead of only
+	# one. The previous algorithm, adapted from cvs2svn, computed
+	# a lot of state which was thrown away and then computed again
+	# for each of the fragments. It should be easier to update and
+	# reuse that state.
+
+	# The code checks only sucessor dependencies, as this
+	# automatically covers the predecessor dependencies as well (A
+	# successor dependency a -> b is also a predecessor dependency
+	# b -> a).
 
 	# Array of dependencies (parent -> child). This is pulled from
 	# the state, and limited to successors within the changeset.
+
 	array set dependencies {}
-
-	set theset ('[join $myrevisions {','}]')
-
-	foreach {rid child} [state run "
-	    SELECT R.rid, R.child
-	    FROM   revision R
-	    WHERE  R.rid   IN $theset
-	    AND    R.child IS NOT NULL
-	    AND    R.child IN $theset
-    UNION
-	    SELECT R.rid, R.dbchild
-	    FROM   revision R
-	    WHERE  R.rid   IN $theset
-	    AND    R.dbchild IS NOT NULL
-	    AND    R.dbchild IN $theset
-    UNION
-	    SELECT R.rid, B.brid
-	    FROM   revision R, revisionbranchchildren B
-	    WHERE  R.rid   IN $theset
-	    AND    R.rid = B.rid
-	    AND    B.brid IN $theset
-	"] {
-	    # Consider moving this to the integrity module.
-	    if {$rid == $child} {
-		trouble internal "Revision $rid depends on itself."
-	    }
-	    set dependencies($rid) $child
-	}
-
+	PullInternalDependencies dependencies $myrevisions
 	if {![array size dependencies]} {return 0} ; # Nothing to break.
+
+	log write 6 csets ...<$myid>.......................................................
 
 	# We have internal dependencies to break. We now iterate over
 	# all positions in the list (which is chronological, at least
 	# as far as the timestamps are correct and unique) and
 	# determine the best position for the break, by trying to
-	# break as many dependencies as possible in one go.
-
-	# First we create a map of positions to make it easier to
-	# determine whether a dependency cross a particular index.
-
-	array set pos {}
-	array set crossing {}
-	set n 0
-	foreach rev $myrevisions {
-	    set pos($rev) $n
-	    set crossing($n) 0
-	    incr n
-	}
-
-	# Secondly we count the crossings per position, by iterating
-	# over the recorded internal dependencies.
-
-	foreach {rid child} [array get dependencies] {
-	    set start $pos($rid)
-	    set end $pos($child)
-
-	    # Note: If the timestamps are badly out of order it is
-	    #       possible to have a backward successor dependency,
-	    #       i.e. with start > end. We may have to swap the
-	    #       indices to ensure that the following loop runs
-	    #       correctly.
-	    #
-	    # Note 2: start == end is not possible. It indicates a
-	    #         self-dependency due to the uniqueness of
-	    #         positions, and that is something we have ruled
-	    #         out already.
-
-	    if {$start > $end} {
-		while {$end < $start} { incr crossing($end)   ; incr end }
+	# break as many dependencies as possible in one go. When a
+	# break was found this is redone for the fragments coming and
+	# after, after upding the crossing information.
+
+	# Data structures:
+	# Map:  POS   revision id      -> position in list.
+	#       CROSS position in list -> number of dependencies crossing it
+	#       DEPC  dependency       -> positions it crosses
+	# List: RANGE Of the positions itself.
+	# A dependency is a single-element map parent -> child
+
+	InitializeBreakState $myrevisions
+
+	set fragments {}
+	set pending   [list $range]
+	set at        0
+	array set breaks {}
+
+	while {$at < [llength $pending]} {
+	    set current [lindex $pending $at]
+
+	    log write 6 csets ". . .. ... ..... ........ ............."
+	    log write 6 csets "Scheduled   [join [PRs [lrange $pending $at end]] { }]"
+	    log write 6 csets "Considering [PR $current] \[$at/[llength $pending]\]"
+
+	    set best [FindBestBreak $current]
+
+	    if {$best < 0} {
+		# The inspected range has no internal
+		# dependencies. This is a complete fragment.
+		lappend fragments $current
+
+		log write 6 csets "No breaks, final"
 	    } else {
-		while {$start < $end} { incr crossing($start) ; incr start }
-	    }
-	}
-
-	# Now we can determine the best break location. First we look
-	# for the locations with the maximal number of crossings. If
-	# there are several we look for the shortest time interval
-	# among them. If we still have multiple possibilities after
-	# that we select the smallest index among these.
-
-	set max -1
-	set best {}
-
-	foreach key [array names crossing] {
-	    set now $crossing($key)
-	    if {$now > $max} {
-		set max $now
-		set best $key
-		continue
-	    } elseif {$now == $max} {
-		lappend best $key
+		# Split the range and schedule the resulting fragments
+		# for further inspection. Remember the number of
+		# dependencies cut before we remove them from
+		# consideration, for documentation later.
+
+		set breaks($best) $cross($best)
+
+		log write 6 csets "Best break @ $best, cuts [nsp $cross($best) dependency dependencies]"
+
+		# Note: The value of best is an abolute location in
+		# myrevisions. Use the start of current to make it an
+		# index absolute to current.
+
+		set brel [expr {$best - [lindex $current 0]}]
+		set bnext $brel ; incr bnext
+		set fragbefore [lrange $current 0 $brel]
+		set fragafter  [lrange $current $bnext end]
+
+		log write 6 csets "New pieces  [PR $fragbefore] [PR $fragafter]"
+
+		if {![llength $fragbefore]} {
+		    trouble internal "Tried to split off a zero-length fragment at the beginning"
+		}
+		if {![llength $fragafter]} {
+		    trouble internal "Tried to split off a zero-length fragment at the end"
+		}
+
+		lappend pending $fragbefore $fragafter
+		CutAt $best
 	    }
+
+	    incr at
+	}
+
+	log write 6 csets ". . .. ... ..... ........ ............."
+
+	# Create changesets for the fragments, reusing the current one
+	# for the first fragment. We sort them in order to allow
+	# checking for gaps and nice messages.
+
+	set fragments [lsort -index 0 -integer $fragments]
+
+	#puts \t.[join [PRs $fragments] .\n\t.].
+
+	Border [lindex $fragments 0] firsts firste
+
+	if {$firsts != 0} {
+	    trouble internal "Bad fragment start @ $firsts, gap, or before beginning of the range"
 	}
 
-	if {[llength $best] > 1} {
-	    set min -1
-	    set newbest {}
-	    foreach at $best {
-		set rat   [lindex $myrevisions $at] ; incr at
-		set rnext [lindex $myrevisions $at] ; incr at -1
-		set tat   [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rat  }] 0]
-		set tnext [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rnext}] 0]
-		set delta [expr {$tnext - $tat}]
-		if {($min < 0) || ($delta < $min)} {
-		    set min $delta
-		    set newbest $at
-		} elseif {$delta == $min} {
-		    lappend newbest $at
-		}
+	set laste $firste
+	foreach fragment [lrange $fragments 1 end] {
+	    Border $fragment s e
+	    if {$laste != ($s - 1)} {
+		trouble internal "Bad fragment border <$laste | $s>, gap or overlap"
 	    }
-	    set best $newbest
-	}
-
-	if {[llength $best] > 1} {
-	    set best [lindex [lsort -integer -increasing $best] 0]
+
+	    set new [$type %AUTO% $myproject $mytype $mysrcid [lrange $myrevisions $s $e]]
+	    lappend csets $new
+
+            log write 4 csets "Breaking <$myid> @ $laste, new <[$new id]>, cutting $breaks($laste)"
+
+	    set laste $e
 	}
 
-	# Now we can split off a fragment.
-
-	set bnext $best ; incr bnext
-	set revbefore [lrange $myrevisions 0 $best]
-	set revafter  [lrange $myrevisions $bnext end]
-
-	if {![llength $revbefore]} {
-	    trouble internal "Tried to split off a zero-length fragment at the beginning"
-	}
-	if {![llength $revafter]} {
-	    trouble internal "Tried to split off a zero-length fragment at the end"
+	if {$laste != ([llength $myrevisions]-1)} {
+	    trouble internal "Bad fragment end @ $laste, gap, or beyond end of the range"
 	}
 
-	lappend csets [set new [$type %AUTO% $myproject $mytype $mysrcid $revafter]]
-	set myrevisions $revbefore
-
-	log write 4 csets "Breaking <$myid> @$best, making <[$new id]>, cutting $crossing($best)"
-
-	#puts "\tKeeping   <$revbefore>"
-	#puts "\tSplit off <$revafter>"
+	# Put the first fragment into the current changeset.
+	set myrevisions [lrange $myrevisions 0 $firste]
 
 	return 1
     }
 
@@ -244,8 +227,249 @@
 	}] { set mycstype($name) $tid }
 	return
     }
 
+    proc PullInternalDependencies {dv revisions} {
+	upvar 1 $dv dependencies
+	set theset ('[join $revisions {','}]')
+
+	foreach {rid child} [state run "
+   -- Primary children
+	    SELECT R.rid, R.child
+	    FROM   revision R
+	    WHERE  R.rid   IN $theset
+	    AND    R.child IS NOT NULL
+	    AND    R.child IN $theset
+    UNION
+    -- Transition NTDB to trunk
+	    SELECT R.rid, R.dbchild
+	    FROM   revision R
+	    WHERE  R.rid   IN $theset
+	    AND    R.dbchild IS NOT NULL
+	    AND    R.dbchild IN $theset
+    UNION
+    -- Secondary (branch) children
+	    SELECT R.rid, B.brid
+	    FROM   revision R, revisionbranchchildren B
+	    WHERE  R.rid   IN $theset
+	    AND    R.rid = B.rid
+	    AND    B.brid IN $theset
+	"] {
+	    # Consider moving this to the integrity module.
+	    if {$rid == $child} {
+		trouble internal "Revision $rid depends on itself."
+	    }
+	    set dependencies($rid) $child
+	}
+    }
+
+    proc InitializeBreakState {revisions} {
+	upvar 1 pos pos cross cross range range depc depc delta delta \
+	    dependencies dependencies
+
+	# First we create a map of positions to make it easier to
+	# determine whether a dependency crosses a particular index.
+
+	array set pos   {}
+	array set cross {}
+	array set depc  {}
+	set range       {}
+	set n 0
+	foreach rev $revisions {
+	    lappend range $n
+	    set pos($rev) $n
+	    set cross($n) 0
+	    incr n
+	}
+
+	# Secondly we count the crossings per position, by iterating
+	# over the recorded internal dependencies.
+
+	# Note: If the timestamps are badly out of order it is
+	#       possible to have a backward successor dependency,
+	#       i.e. with start > end. We may have to swap the indices
+	#       to ensure that the following loop runs correctly.
+	#
+	# Note 2: start == end is not possible. It indicates a
+	#         self-dependency due to the uniqueness of positions,
+	#         and that is something we have ruled out already, see
+	#         PullInternalDependencies.
+
+	foreach {rid child} [array get dependencies] {
+	    set dkey    [list $rid $child]
+	    set start   $pos($rid)
+	    set end     $pos($child)
+	    set crosses {}
+
+	    if {$start > $end} {
+		while {$end < $start} {
+		    lappend crosses $end
+		    incr cross($end)
+		    incr end
+		}
+	    } else {
+		while {$start < $end} {
+		    lappend crosses $start
+		    incr cross($start)
+		    incr start
+		}
+	    }
+	    set depc($dkey) $crosses
+	}
+
+	InitializeDeltas $revisions
+	return
+    }
+
+    proc InitializeDeltas {revisions} {
+	upvar 1 delta delta
+
+	# Pull the timestamps for all revisions in the changesets and
+	# compute their deltas for use by the break finder.
+
+	array set delta {}
+	array set stamp {}
+
+	set theset ('[join $revisions {','}]')
+	foreach {rid time} [state run "
+	    SELECT R.rid, R.date
+	    FROM revision R
+	    WHERE R.rid IN $theset
+	"] {
+	    set stamp($rid) $time
+	}
+
+	set n 0
+	foreach rid [lrange $revisions 0 end-1] rnext [lrange $revisions 1 end] {
+	    set delta($n) [expr {$stamp($rnext) - $stamp($rid)}]
+	    incr n
+	}
+	return
+    }
+
+    proc FindBestBreak {range} {
+	upvar 1 cross cross delta delta
+
+	# Determine the best break location in the given range of
+	# positions. First we look for the locations with the maximal
+	# number of crossings. If there are several we look for the
+	# shortest time interval among them. If we still have multiple
+	# possibilities after that we select the earliest location
+	# among these.
+
+	# Note: If the maximal number of crossings is 0 then the range
+	#       has no internal dependencies, and no break location at
+	#       all. This possibility is signaled via result -1.
+
+	# Note: A range of length 1 or less cannot have internal
+	#       dependencies, as that needs at least two revisions in
+	#       the range.
+
+	if {[llength $range] < 2} { return -1 }
+
+	set max -1
+	set best {}
+
+	foreach location $range {
+	    set crossings $cross($location)
+	    if {$crossings > $max} {
+		set max  $crossings
+		set best [list $location]
+		continue
+	    } elseif {$crossings == $max} {
+		lappend best $location
+	    }
+	}
+
+	if {$max == 0}            { return -1 }
+	if {[llength $best] == 1} { return [lindex $best 0] }
+
+	set locations $best
+	set best {}
+	set min -1
+
+	foreach location $locations {
+	    set interval $delta($location)
+	    if {($min < 0) || ($interval < $min)} {
+		set min  $interval
+		set best [list $location]
+	    } elseif {$interval == $min} {
+		lappend best $location
+	    }
+	}
+
+	if {[llength $best] == 1} { return [lindex $best 0] }
+
+	return [lindex [lsort -integer -increasing $best] 0]
+    }
+
+    proc CutAt {location} {
+	upvar 1 cross cross depc depc
+
+	# It was decided to split the changeset at the given
+	# location. This cuts a number of dependencies. Here we update
+	# the cross information so that the break finder has accurate
+	# data when we look at the generated fragments.
+
+	set six [log visible? 6]
+
+	foreach {dep range} [array get depc] {
+	    # Check all dependencies still known, take their range and
+	    # see if the break location falls within.
+
+	    Border $range s e
+	    if {$location < $s} continue ; # break before range, ignore
+	    if {$location > $e} continue ; # break after range, ignore.
+
+	    # This dependency crosses the break location. We remove it
+	    # from the crossings counters, and then also from the set
+	    # of known dependencies, as we are done with it.
+
+	    foreach loc $depc($dep) { incr cross($loc) -1 }
+	    unset depc($dep)
+
+	    if {!$six} continue
+
+	    struct::list assign $dep parent child
+	    log write 6 csets "Broke dependency [PD $parent] --> [PD $child]"
+	}
+
+	return
+    }
+
+    # Print identifying data for a revision (project, file, dotted rev
+    # number), for high verbosity log output.
+
+    proc PD {id} {
+	foreach {p f r} [state run {
+		SELECT P.name , F.name, R.rev
+		FROM revision R, file F, project P
+		WHERE R.rid = $id
+		AND   R.fid = F.fid
+		AND   F.pid = P.pid
+	}] break
+	return "'$p : $f/$r'"
+    }
+
+    # Printing one or more ranges, formatted, and only their border to
+    # keep the strings short.
+
+    proc PRs {ranges} {
+	return [struct::list map $ranges [myproc PR]]
+    }
+
+    proc PR {range} {
+	Border $range s e
+	return <${s}...${e}>
+    }
+
+    proc Border {range sv ev} {
+	upvar 1 $sv s $ev e
+	set s [lindex $range 0]
+	set e [lindex $range end]
+	return
+    }
+
     # # ## ### ##### ######## #############
     ## Configuration
 
     pragma -hastypeinfo    no  ; # no type introspection
@@ -258,8 +482,10 @@
 namespace eval ::vc::fossil::import::cvs::project {
     namespace export rev
     namespace eval rev {
 	namespace import ::vc::fossil::import::cvs::state
+	namespace import ::vc::tools::misc::*
+	namespace import ::vc::tools::trouble
 	namespace import ::vc::tools::log
 	log register csets
     }
 }