Overview
SHA1 Hash: | c14e8f84cd33db73c2eeee82e2781f964d650752 |
---|---|
Date: | 2007-11-30 06:50:47 |
User: | aku |
Comment: | Moved the integrity checks for split fragments into separate command. Reworked breaking of internal dependencies to contrain the length of the pending list. That part of the system is still a memory hog, especially for large changesets. Added notes about this and the successor retrieval being a bottleneck. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified tools/cvs2fossil/lib/c2f_prev.tcl from [e7ab6ef540] to [b900e0555e].
@@ -142,10 +142,20 @@ set mypremap [array get tmp] return $mypremap } method breakinternaldependencies {} { + + ## + ## NOTE: This method, maybe in conjunction with its caller + ## seems to be a memory hog, especially for large + ## changesets, with 'large' meaning to have a 'long list + ## of items, several thousand'. Investigate where the + ## memory is spent and then look for ways of rectifying + ## the problem. + ## + # This method inspects the changesets for internal # dependencies. Nothing is done if there are no # such. Otherwise the changeset is split into a set of # fragments without internal dependencies, transforming the # internal dependencies into external ones. The new changesets @@ -187,58 +197,73 @@ # A dependency is a single-element map parent -> child InitializeBreakState $myitems set fragments {} - set pending [list $range] - set at 0 + set new [list $range] array set breaks {} - while {$at < [llength $pending]} { - set current [lindex $pending $at] - - log write 6 csets {. . .. ... ..... ........ .............} - log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]} - log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]} - - set best [FindBestBreak $current] - - if {$best < 0} { - # The inspected range has no internal - # dependencies. This is a complete fragment. - lappend fragments $current - - log write 6 csets "No breaks, final" - } else { - # Split the range and schedule the resulting fragments - # for further inspection. Remember the number of - # dependencies cut before we remove them from - # consideration, for documentation later. - - set breaks($best) $cross($best) - - log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]" - - # Note: The value of best is an abolute location in - # myitems. Use the start of current to make it an - # index absolute to current. - - set brel [expr {$best - [lindex $current 0]}] - set bnext $brel ; incr bnext - set fragbefore [lrange $current 0 $brel] - set fragafter [lrange $current $bnext end] - - log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]" - - integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning} - integrity assert {[llength $fragafter]} {Found zero-length fragment at the end} - - lappend pending $fragbefore $fragafter - CutAt $best - } - - incr at + # Instead of one list holding both processed and pending + # fragments we use two, one for the framents to process, one + # to hold the new fragments, and the latter is copied to the + # former when they run out. This keeps the list of pending + # fragments short without sacrificing speed by shifting stuff + # down. We especially drop the memory of fragments broken + # during processing after a short time, instead of letting it + # consume memory. + + while {[llength $new]} { + + set pending $new + set new {} + set at 0 + + while {$at < [llength $pending]} { + set current [lindex $pending $at] + + log write 6 csets {. . .. ... ..... ........ .............} + log write 6 csets {Scheduled [join [PRs [lrange $pending $at end]] { }]} + log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]} + + set best [FindBestBreak $current] + + if {$best < 0} { + # The inspected range has no internal + # dependencies. This is a complete fragment. + lappend fragments $current + + log write 6 csets "No breaks, final" + } else { + # Split the range and schedule the resulting + # fragments for further inspection. Remember the + # number of dependencies cut before we remove them + # from consideration, for documentation later. + + set breaks($best) $cross($best) + + log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]" + + # Note: The value of best is an abolute location + # in myitems. Use the start of current to make it + # an index absolute to current. + + set brel [expr {$best - [lindex $current 0]}] + set bnext $brel ; incr bnext + set fragbefore [lrange $current 0 $brel] + set fragafter [lrange $current $bnext end] + + log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]" + + integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning} + integrity assert {[llength $fragafter]} {Found zero-length fragment at the end} + + lappend new $fragbefore $fragafter + CutAt $best + } + + incr at + } } log write 6 csets ". . .. ... ..... ........ ............." # (*) We clear out the associated part of the myitemmap @@ -339,11 +364,11 @@ set mychangesets [lreplace $mychangesets $pos $pos] return } method selfreferential {} { - log write 9 csets {Checking [$self str] /[llength $myitems]} + log write 7 csets {Checking [$self str] /[llength $myitems]} if {![struct::set contains [$self successors] $self]} { return 0 } if {[log verbosity?] < 8} { return 1 } @@ -378,40 +403,12 @@ # # Note: The item lists found in args are tagged items. They # have to have the same type as the changeset, being subsets # of its items. This is checked in Untag1. - # Constraints: No fragment must be empty. All fragments have - # to be subsets of the cset. The union has to cover the - # original. All pairwise intersections have to be empty. - log write 8 csets {OLD: [lsort [$cset items]]} - - set cover {} - foreach fragmentitems $args { - log write 8 csets {NEW: [lsort $fragmentitems]} - - integrity assert { - ![struct::set empty $fragmentitems] - } {changeset fragment is empty} - integrity assert { - [struct::set subsetof $fragmentitems [$cset items]] - } {changeset fragment is not a subset} - struct::set add cover $fragmentitems - } - integrity assert { - [struct::set equal $cover [$cset items]] - } {The fragments do not cover the original changeset} - set i 1 - foreach fia $args { - foreach fib [lrange $args $i end] { - integrity assert { - [struct::set empty [struct::set intersect $fia $fib]] - } {The fragments <$fia> and <$fib> overlap} - } - incr i - } + ValidateFragments $cset $args # All checks pass, actually perform the split. struct::list assign [$cset data] project cstype cssrc @@ -434,10 +431,15 @@ trouble abort? return $newcsets } + typemethod itemstr {item} { + struct::list assign $item itype iid + return [$itype str $iid] + } + typemethod strlist {changesets} { return [join [struct::list map $changesets [myproc ID]]] } proc ID {cset} { $cset str } @@ -450,13 +452,55 @@ struct::list assign $theitem t i integrity assert {$cstype eq $t} {Item $i's type is '$t', expected '$cstype'} return $i } - typemethod itemstr {item} { - struct::list assign $item itype iid - return [$itype str $iid] + proc ValidateFragments {cset fragments} { + # Check the various integrity constraints for the fragments + # specifying how to split the changeset: + # + # * We must have two or more fragments, as splitting a + # changeset into one makes no sense. + # * No fragment may be empty. + # * All fragments have to be true subsets of the items in the + # changeset to split. The 'true' is implied because none are + # allowed to be empty, so each has to be smaller than the + # total. + # * The union of the fragments has to be the item set of the + # changeset. + # * The fragment must not overlap, i.e. their pairwise + # intersections have to be empty. + + set cover {} + foreach fragmentitems $args { + log write 8 csets {NEW: [lsort $fragmentitems]} + + integrity assert { + ![struct::set empty $fragmentitems] + } {changeset fragment is empty} + + integrity assert { + [struct::set subsetof $fragmentitems [$cset items]] + } {changeset fragment is not a subset} + struct::set add cover $fragmentitems + } + + integrity assert { + [struct::set equal $cover [$cset items]] + } {The fragments do not cover the original changeset} + + set i 1 + foreach fia $args { + foreach fib [lrange $args $i end] { + integrity assert { + [struct::set empty [struct::set intersect $fia $fib]] + } {The fragments <$fia> and <$fib> overlap} + } + incr i + } + + return } # # ## ### ##### ######## ############# ## State @@ -747,10 +791,16 @@ pragma -hastypeinfo no ; # no type introspection pragma -hasinfo no ; # no object introspection # # ## ### ##### ######## ############# } + +## +## NOTE: The successor and predecessor methods defined by the classes +## below are -- bottle necks --. Look for ways to make the SQL +## faster. +## # # ## ### ##### ######## ############# ##################### ## Helper singleton. Commands for revision changesets. snit::type ::vc::fossil::import::cvs::project::rev::rev {