Check-in [c14e8f84cd]
Not logged in
Overview

SHA1 Hash:c14e8f84cd33db73c2eeee82e2781f964d650752
Date: 2007-11-30 06:50:47
User: aku
Comment:Moved the integrity checks for split fragments into separate command. Reworked breaking of internal dependencies to contrain the length of the pending list. That part of the system is still a memory hog, especially for large changesets. Added notes about this and the successor retrieval being a bottleneck.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified tools/cvs2fossil/lib/c2f_prev.tcl from [e7ab6ef540] to [b900e0555e].

@@ -142,10 +142,20 @@
 	set mypremap [array get tmp]
 	return $mypremap
     }
 
     method breakinternaldependencies {} {
+
+	##
+	## NOTE: This method, maybe in conjunction with its caller
+	##       seems to be a memory hog, especially for large
+	##       changesets, with 'large' meaning to have a 'long list
+	##       of items, several thousand'. Investigate where the
+	##       memory is spent and then look for ways of rectifying
+	##       the problem.
+	##
+
 	# This method inspects the changesets for internal
 	# dependencies. Nothing is done if there are no
 	# such. Otherwise the changeset is split into a set of
 	# fragments without internal dependencies, transforming the
 	# internal dependencies into external ones. The new changesets
@@ -187,58 +197,73 @@
 	# A dependency is a single-element map parent -> child
 
 	InitializeBreakState $myitems
 
 	set fragments {}
-	set pending   [list $range]
-	set at        0
+	set new       [list $range]
 	array set breaks {}
 
-	while {$at < [llength $pending]} {
-	    set current [lindex $pending $at]
-
-	    log write 6 csets {. . .. ... ..... ........ .............}
-	    log write 6 csets {Scheduled   [join [PRs [lrange $pending $at end]] { }]}
-	    log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
-
-	    set best [FindBestBreak $current]
-
-	    if {$best < 0} {
-		# The inspected range has no internal
-		# dependencies. This is a complete fragment.
-		lappend fragments $current
-
-		log write 6 csets "No breaks, final"
-	    } else {
-		# Split the range and schedule the resulting fragments
-		# for further inspection. Remember the number of
-		# dependencies cut before we remove them from
-		# consideration, for documentation later.
-
-		set breaks($best) $cross($best)
-
-		log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
-
-		# Note: The value of best is an abolute location in
-		# myitems. Use the start of current to make it an
-		# index absolute to current.
-
-		set brel [expr {$best - [lindex $current 0]}]
-		set bnext $brel ; incr bnext
-		set fragbefore [lrange $current 0 $brel]
-		set fragafter  [lrange $current $bnext end]
-
-		log write 6 csets "New pieces  [PR $fragbefore] [PR $fragafter]"
-
-		integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
-		integrity assert {[llength $fragafter]}  {Found zero-length fragment at the end}
-
-		lappend pending $fragbefore $fragafter
-		CutAt $best
-	    }
-
-	    incr at
+	# Instead of one list holding both processed and pending
+	# fragments we use two, one for the framents to process, one
+	# to hold the new fragments, and the latter is copied to the
+	# former when they run out. This keeps the list of pending
+	# fragments short without sacrificing speed by shifting stuff
+	# down. We especially drop the memory of fragments broken
+	# during processing after a short time, instead of letting it
+	# consume memory.
+
+	while {[llength $new]} {
+
+	    set pending $new
+	    set new     {}
+	    set at      0
+
+	    while {$at < [llength $pending]} {
+		set current [lindex $pending $at]
+
+		log write 6 csets {. . .. ... ..... ........ .............}
+		log write 6 csets {Scheduled   [join [PRs [lrange $pending $at end]] { }]}
+		log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}
+
+		set best [FindBestBreak $current]
+
+		if {$best < 0} {
+		    # The inspected range has no internal
+		    # dependencies. This is a complete fragment.
+		    lappend fragments $current
+
+		    log write 6 csets "No breaks, final"
+		} else {
+		    # Split the range and schedule the resulting
+		    # fragments for further inspection. Remember the
+		    # number of dependencies cut before we remove them
+		    # from consideration, for documentation later.
+
+		    set breaks($best) $cross($best)
+
+		    log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"
+
+		    # Note: The value of best is an abolute location
+		    # in myitems. Use the start of current to make it
+		    # an index absolute to current.
+
+		    set brel [expr {$best - [lindex $current 0]}]
+		    set bnext $brel ; incr bnext
+		    set fragbefore [lrange $current 0 $brel]
+		    set fragafter  [lrange $current $bnext end]
+
+		    log write 6 csets "New pieces  [PR $fragbefore] [PR $fragafter]"
+
+		    integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
+		    integrity assert {[llength $fragafter]}  {Found zero-length fragment at the end}
+
+		    lappend new $fragbefore $fragafter
+		    CutAt $best
+		}
+
+		incr at
+	    }
 	}
 
 	log write 6 csets ". . .. ... ..... ........ ............."
 
 	# (*) We clear out the associated part of the myitemmap
@@ -339,11 +364,11 @@
 	set mychangesets [lreplace $mychangesets $pos $pos]
 	return
     }
 
     method selfreferential {} {
-	log write 9 csets {Checking [$self str] /[llength $myitems]}
+	log write 7 csets {Checking [$self str] /[llength $myitems]}
 
 	if {![struct::set contains [$self successors] $self]} {
 	    return 0
 	}
 	if {[log verbosity?] < 8} { return 1 }
@@ -378,40 +403,12 @@
 	#
 	# Note: The item lists found in args are tagged items. They
 	# have to have the same type as the changeset, being subsets
 	# of its items. This is checked in Untag1.
 
-	# Constraints: No fragment must be empty. All fragments have
-	# to be subsets of the cset. The union has to cover the
-	# original. All pairwise intersections have to be empty.
-
 	log write 8 csets {OLD: [lsort [$cset items]]}
-
-	set cover {}
-	foreach fragmentitems $args {
-	    log write 8 csets {NEW: [lsort $fragmentitems]}
-
-	    integrity assert {
-		![struct::set empty $fragmentitems]
-	    } {changeset fragment is empty}
-	    integrity assert {
-		[struct::set subsetof $fragmentitems [$cset items]]
-	    } {changeset fragment is not a subset}
-	    struct::set add cover $fragmentitems
-	}
-	integrity assert {
-	    [struct::set equal $cover [$cset items]]
-	 } {The fragments do not cover the original changeset}
-	set i 1
-	foreach fia $args {
-	    foreach fib [lrange $args $i end] {
-		integrity assert {
-		    [struct::set empty [struct::set intersect $fia $fib]]
-		} {The fragments <$fia> and <$fib> overlap}
-	    }
-	    incr i
-	}
+	ValidateFragments $cset $args
 
 	# All checks pass, actually perform the split.
 
 	struct::list assign [$cset data] project cstype cssrc
 
@@ -434,10 +431,15 @@
 
 	trouble abort?
 	return $newcsets
     }
 
+    typemethod itemstr {item} {
+	struct::list assign $item itype iid
+	return [$itype str $iid]
+    }
+
     typemethod strlist {changesets} {
 	return [join [struct::list map $changesets [myproc ID]]]
     }
 
     proc ID {cset} { $cset str }
@@ -450,13 +452,55 @@
 	struct::list assign $theitem t i
 	integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'}
 	return $i
     }
 
-    typemethod itemstr {item} {
-	struct::list assign $item itype iid
-	return [$itype str $iid]
+    proc ValidateFragments {cset fragments} {
+	# Check the various integrity constraints for the fragments
+	# specifying how to split the changeset:
+	#
+	# * We must have two or more fragments, as splitting a
+	#   changeset into one makes no sense.
+	# * No fragment may be empty.
+	# * All fragments have to be true subsets of the items in the
+	#   changeset to split. The 'true' is implied because none are
+	#   allowed to be empty, so each has to be smaller than the
+	#   total.
+	# * The union of the fragments has to be the item set of the
+	#   changeset.
+	# * The fragment must not overlap, i.e. their pairwise
+	#   intersections have to be empty.
+
+	set cover {}
+	foreach fragmentitems $args {
+	    log write 8 csets {NEW: [lsort $fragmentitems]}
+
+	    integrity assert {
+		![struct::set empty $fragmentitems]
+	    } {changeset fragment is empty}
+
+	    integrity assert {
+		[struct::set subsetof $fragmentitems [$cset items]]
+	    } {changeset fragment is not a subset}
+	    struct::set add cover $fragmentitems
+	}
+
+	integrity assert {
+	    [struct::set equal $cover [$cset items]]
+	 } {The fragments do not cover the original changeset}
+
+	set i 1
+	foreach fia $args {
+	    foreach fib [lrange $args $i end] {
+		integrity assert {
+		    [struct::set empty [struct::set intersect $fia $fib]]
+		} {The fragments <$fia> and <$fib> overlap}
+	    }
+	    incr i
+	}
+
+	return
     }
 
     # # ## ### ##### ######## #############
     ## State
 
@@ -747,10 +791,16 @@
     pragma -hastypeinfo    no  ; # no type introspection
     pragma -hasinfo        no  ; # no object introspection
 
     # # ## ### ##### ######## #############
 }
+
+##
+## NOTE: The successor and predecessor methods defined by the classes
+##       below are -- bottle necks --. Look for ways to make the SQL
+##       faster.
+##
 
 # # ## ### ##### ######## ############# #####################
 ## Helper singleton. Commands for revision changesets.
 
 snit::type ::vc::fossil::import::cvs::project::rev::rev {