Fossil: Diff

Differences From:

File tools/cvs2fossil/lib/c2f_prev.tcl part of check-in [facb4a8721] - Fixed bug in new changeset code, tagged and untagged item lists went out of sync. by aku on 2007-11-30 04:27:05. [view]
To:

File tools/cvs2fossil/lib/c2f_prev.tcl part of check-in [c14e8f84cd] - Moved the integrity checks for split fragments into separate command. Reworked breaking of internal dependencies to contrain the length of the pending list. That part of the system is still a memory hog, especially for large changesets. Added notes about this and the successor retrieval being a bottleneck. by aku on 2007-11-30 06:50:47. [view]
@@ -143,8 +143,18 @@
 	return $mypremap
     }
 
     method breakinternaldependencies {} {
+
+	##
+	## NOTE: This method, maybe in conjunction with its caller
+	##       seems to be a memory hog, especially for large
+	##       changesets, with 'large' meaning to have a 'long list
+	##       of items, several thousand'. Investigate where the
+	##       memory is spent and then look for ways of rectifying
+	##       the problem.
+	##
+
 	# This method inspects the changesets for internal
 	# dependencies. Nothing is done if there are no
 	# such. Otherwise the changeset is split into a set of
 	# fragments without internal dependencies, transforming the
@@ -188,56 +198,71 @@
 
 	InitializeBreakState $myitems
 
 	set fragments {}
-	set pending   [list $range]
-	set at        0
+	set new       [list $range]
 	array set breaks {}
 
-	while {$at < [llength $pending]} {
-	    set current [lindex $pending $at]
-
-	    log write 6 csets {. . .. ... ..... ........ .............}
-	    log write 6 csets {Scheduled   [join [PRs [lrange $pending $at end]] { }]}
-	    log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
-
-	    set best [FindBestBreak $current]
-
-	    if {$best < 0} {
-		# The inspected range has no internal
-		# dependencies. This is a complete fragment.
-		lappend fragments $current
-
-		log write 6 csets "No breaks, final"
-	    } else {
-		# Split the range and schedule the resulting fragments
-		# for further inspection. Remember the number of
-		# dependencies cut before we remove them from
-		# consideration, for documentation later.
-
-		set breaks($best) $cross($best)
-
-		log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
-
-		# Note: The value of best is an abolute location in
-		# myitems. Use the start of current to make it an
-		# index absolute to current.
-
-		set brel [expr {$best - [lindex $current 0]}]
-		set bnext $brel ; incr bnext
-		set fragbefore [lrange $current 0 $brel]
-		set fragafter  [lrange $current $bnext end]
-
-		log write 6 csets "New pieces  [PR $fragbefore] [PR $fragafter]"
-
-		integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
-		integrity assert {[llength $fragafter]}  {Found zero-length fragment at the end}
-
-		lappend pending $fragbefore $fragafter
-		CutAt $best
-	    }
-
-	    incr at
+	# Instead of one list holding both processed and pending
+	# fragments we use two, one for the framents to process, one
+	# to hold the new fragments, and the latter is copied to the
+	# former when they run out. This keeps the list of pending
+	# fragments short without sacrificing speed by shifting stuff
+	# down. We especially drop the memory of fragments broken
+	# during processing after a short time, instead of letting it
+	# consume memory.
+
+	while {[llength $new]} {
+
+	    set pending $new
+	    set new     {}
+	    set at      0
+
+	    while {$at < [llength $pending]} {
+		set current [lindex $pending $at]
+
+		log write 6 csets {. . .. ... ..... ........ .............}
+		log write 6 csets {Scheduled   [join [PRs [lrange $pending $at end]] { }]}
+		log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
+
+		set best [FindBestBreak $current]
+
+		if {$best < 0} {
+		    # The inspected range has no internal
+		    # dependencies. This is a complete fragment.
+		    lappend fragments $current
+
+		    log write 6 csets "No breaks, final"
+		} else {
+		    # Split the range and schedule the resulting
+		    # fragments for further inspection. Remember the
+		    # number of dependencies cut before we remove them
+		    # from consideration, for documentation later.
+
+		    set breaks($best) $cross($best)
+
+		    log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
+
+		    # Note: The value of best is an abolute location
+		    # in myitems. Use the start of current to make it
+		    # an index absolute to current.
+
+		    set brel [expr {$best - [lindex $current 0]}]
+		    set bnext $brel ; incr bnext
+		    set fragbefore [lrange $current 0 $brel]
+		    set fragafter  [lrange $current $bnext end]
+
+		    log write 6 csets "New pieces  [PR $fragbefore] [PR $fragafter]"
+
+		    integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
+		    integrity assert {[llength $fragafter]}  {Found zero-length fragment at the end}
+
+		    lappend new $fragbefore $fragafter
+		    CutAt $best
+		}
+
+		incr at
+	    }
 	}
 
 	log write 6 csets ". . .. ... ..... ........ ............."
 
@@ -340,9 +365,9 @@
 	return
     }
 
     method selfreferential {} {
-	log write 9 csets {Checking [$self str] /[llength $myitems]}
+	log write 7 csets {Checking [$self str] /[llength $myitems]}
 
 	if {![struct::set contains [$self successors] $self]} {
 	    return 0
 	}
@@ -379,38 +404,10 @@
 	# Note: The item lists found in args are tagged items. They
 	# have to have the same type as the changeset, being subsets
 	# of its items. This is checked in Untag1.
 
-	# Constraints: No fragment must be empty. All fragments have
-	# to be subsets of the cset. The union has to cover the
-	# original. All pairwise intersections have to be empty.
-
 	log write 8 csets {OLD: [lsort [$cset items]]}
-
-	set cover {}
-	foreach fragmentitems $args {
-	    log write 8 csets {NEW: [lsort $fragmentitems]}
-
-	    integrity assert {
-		![struct::set empty $fragmentitems]
-	    } {changeset fragment is empty}
-	    integrity assert {
-		[struct::set subsetof $fragmentitems [$cset items]]
-	    } {changeset fragment is not a subset}
-	    struct::set add cover $fragmentitems
-	}
-	integrity assert {
-	    [struct::set equal $cover [$cset items]]
-	 } {The fragments do not cover the original changeset}
-	set i 1
-	foreach fia $args {
-	    foreach fib [lrange $args $i end] {
-		integrity assert {
-		    [struct::set empty [struct::set intersect $fia $fib]]
-		} {The fragments <$fia> and <$fib> overlap}
-	    }
-	    incr i
-	}
+	ValidateFragments $cset $args
 
 	# All checks pass, actually perform the split.
 
 	struct::list assign [$cset data] project cstype cssrc
@@ -435,8 +432,13 @@
 	trouble abort?
 	return $newcsets
     }
 
+    typemethod itemstr {item} {
+	struct::list assign $item itype iid
+	return [$itype str $iid]
+    }
+
     typemethod strlist {changesets} {
 	return [join [struct::list map $changesets [myproc ID]]]
     }
 
@@ -451,11 +453,53 @@
 	integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
 	return $i
     }
 
-    typemethod itemstr {item} {
-	struct::list assign $item itype iid
-	return [$itype str $iid]
+    proc ValidateFragments {cset fragments} {
+	# Check the various integrity constraints for the fragments
+	# specifying how to split the changeset:
+	#
+	# * We must have two or more fragments, as splitting a
+	#   changeset into one makes no sense.
+	# * No fragment may be empty.
+	# * All fragments have to be true subsets of the items in the
+	#   changeset to split. The 'true' is implied because none are
+	#   allowed to be empty, so each has to be smaller than the
+	#   total.
+	# * The union of the fragments has to be the item set of the
+	#   changeset.
+	# * The fragment must not overlap, i.e. their pairwise
+	#   intersections have to be empty.
+
+	set cover {}
+	foreach fragmentitems $args {
+	    log write 8 csets {NEW: [lsort $fragmentitems]}
+
+	    integrity assert {
+		![struct::set empty $fragmentitems]
+	    } {changeset fragment is empty}
+
+	    integrity assert {
+		[struct::set subsetof $fragmentitems [$cset items]]
+	    } {changeset fragment is not a subset}
+	    struct::set add cover $fragmentitems
+	}
+
+	integrity assert {
+	    [struct::set equal $cover [$cset items]]
+	 } {The fragments do not cover the original changeset}
+
+	set i 1
+	foreach fia $args {
+	    foreach fib [lrange $args $i end] {
+		integrity assert {
+		    [struct::set empty [struct::set intersect $fia $fib]]
+		} {The fragments <$fia> and <$fib> overlap}
+	    }
+	    incr i
+	}
+
+	return
     }
 
     # # ## ### ##### ######## #############
     ## State
@@ -748,8 +792,14 @@
     pragma -hasinfo        no  ; # no object introspection
 
     # # ## ### ##### ######## #############
 }
+
+##
+## NOTE: The successor and predecessor methods defined by the classes
+##       below are -- bottle necks --. Look for ways to make the SQL
+##       faster.
+##
 
 # # ## ### ##### ######## ############# #####################
 ## Helper singleton. Commands for revision changesets.