Overview
SHA1 Hash: | 94c39d63758d78dc26b3ad5c2fdd285ea37047d1 |
---|---|
Date: | 2007-11-14 05:11:56 |
User: | aku |
Comment: | Completed pass 6, wrote the code performing the breaking of cycles. Done by analysing each triple of changesets in the cycle at the file dependency level to see which revisions can be sorted apart. Added some additional utility routines. Extended the changeset class with the accessors required by the cycle breaker. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified tools/cvs2fossil/lib/c2f_pbreakrcycle.tcl from [0d5836726f] to [b471ab73ff].
@@ -15,17 +15,18 @@ ## dependency tree. # # ## ### ##### ######## ############# ##################### ## Requirements -package require Tcl 8.4 ; # Required runtime. -package require snit ; # OO system. -package require struct::graph ; # Graph handling. -package require struct::list ; # Higher order list operations. -package require vc::tools::log ; # User feedback. -package require vc::fossil::import::cvs::state ; # State storage. -package require vc::fossil::import::cvs::project::rev ; # Project level changesets +package require Tcl 8.4 ; # Required runtime. +package require snit ; # OO system. +package require struct::graph ; # Graph handling. +package require struct::list ; # Higher order list operations. +package require vc::tools::log ; # User feedback. +package require vc::fossil::import::cvs::state ; # State storage. +package require vc::fossil::import::cvs::project::rev ; # Project level changesets +package require vc::fossil::import::cvs::project::revlink ; # Cycle links. # # ## ### ##### ######## ############# ##################### ## Register the pass with the management vc::fossil::import::cvs::pass define \ @@ -65,20 +66,22 @@ typemethod run {} { # Pass manager interface. Executed to perform the # functionality of the pass. state reading revision + state reading changeset + state reading csrevision # We create a graph of the revision changesets, using the file # level dependencies to construct a first approximation of # them at the project level. Then look for cycles in that # graph and break them. # 1. Create nodes for all relevant changesets and a mapping # from the revisions to their changesets/nodes. - log write 3 brkrcycle {Creating changeset graph, filling with nodes} + log write 3 breakrcycle {Creating changeset graph, filling with nodes} set dg [struct::graph dg] state transaction { foreach cset [project::rev all] { @@ -90,11 +93,11 @@ # 2. Find for all relevant changeset their revisions and their # dependencies. Map the latter back to changesets and # construct the corresponding arcs. - log write 3 brkrcycle {Setting up node dependencies} + log write 3 breakrcycle {Setting up node dependencies} state transaction { foreach cset [project::rev all] { if {[$cset bysymbol]} continue foreach succ [$cset successors] { @@ -107,21 +110,21 @@ # the nodes which have no predecessors, in order from # oldest to youngest, saving and removing dependencies. If # we find no nodes without predecessors we have a cycle, # and work on breaking it. - log write 3 brkrcycle {Computing changeset order, breaking cycles} + log write 3 breakrcycle {Computing changeset order, breaking cycles} InitializeCandidates $dg state transaction { while {1} { while {[WithoutPredecessor $dg n]} { SaveAndRemove $dg $n } if {![llength [dg nodes]]} break - set cycle [FindCycle $dg] - BreakCycle $dg $cycle + BreakCycle $dg [FindCycle $dg] + InitializeCandidates $dg } } return } @@ -224,20 +227,76 @@ while {1} { # Stop searching when we have seen the current node # already, the circle has been closed. if {[info exists seen($start)]} break lappend path $start - set seen($start) [llength $path] + set seen($start) [expr {[llength $path]-1}] # Choose arbitrary predecessor set start [lindex [$dg nodes -in $start] 0] } return [struct::list reverse [lrange $path $seen($start) end]] } + proc ID {cset} { return "<[$cset id]>" } + proc BreakCycle {dg cycle} { - trouble internal "Break cycle <$cycle>" + # The cycle we have gotten is broken by breaking apart one or + # more of the changesets in the cycle. This causes us to + # create one or more changesets which are to be committed, + # added to the graph, etc. pp. + + set cprint [join [struct::list map $cycle [myproc ID]] { }] + + lappend cycle [lindex $cycle 0] [lindex $cycle 1] + set bestlink {} + set bestnode {} + + foreach \ + prev [lrange $cycle 0 end-2] \ + cset [lrange $cycle 1 end-1] \ + next [lrange $cycle 2 end] { + + # Each triple PREV -> CSET -> NEXT of changesets, a + # 'link' in the cycle, is analysed and the best + # location where to at least weaken the cycle is + # chosen for further processing. + + set link [project::revlink %AUTO% $prev $cset $next] + if {$bestlink eq ""} { + set bestlink $link + set bestnode $cset + } elseif {[$link betterthan $bestlink]} { + $bestlink destroy + set bestlink $link + set bestnode $cset + } else { + $link destroy + } + } + + log write 5 breakrcycle "Breaking cycle ($cprint) by splitting changeset <[$bestnode id]>" + + set newcsets [$bestlink break] + $bestlink destroy + + # At this point the old changeset (BESTNODE) is gone + # already. We remove it from the graph as well and then enter + # the fragments generated for it. + + $dg node delete $bestnode + + foreach cset $newcsets { + dg node insert $cset + dg node set $cset timerange [$cset timerange] + } + + foreach cset $newcsets { + foreach succ [$cset successors] { + dg arc insert $cset $succ + } + } return } typevariable at 0 ; # Counter for commit ids for the changesets. typevariable bottom {} ; # List of candidate nodes for committing. @@ -256,16 +315,17 @@ namespace export breakrcycle namespace eval breakrcycle { namespace import ::vc::fossil::import::cvs::state namespace eval project { namespace import ::vc::fossil::import::cvs::project::rev + namespace import ::vc::fossil::import::cvs::project::revlink } namespace import ::vc::tools::log - log register brkrcycle + log register breakrcycle } } # # ## ### ##### ######## ############# ##################### ## Ready package provide vc::fossil::import::cvs::pass::breakrcycle 1.0 return
Modified tools/cvs2fossil/lib/c2f_prev.tcl from [27b0babb50] to [b05edeebe6].
@@ -44,25 +44,33 @@ return } method id {} { return $myid } method revisions {} { return $myrevisions } + method data {} { return [list $myproject $mytype $mysrcid] } method setid {id} { set myid $id ; return } method bysymbol {} { return [expr {$mytype eq "sym"}] } method byrevision {} { return [expr {$mytype eq "rev"}] } method successors {} { # NOTE / FUTURE: Possible bottleneck. - array set dependencies {} - PullSuccessorRevisions dependencies $myrevisions set csets {} - foreach {_ child} [array get dependencies] { - lappend csets $myrevmap($child) + foreach {_ children} [$self nextmap] { + foreach child $children { + lappend csets $myrevmap($child) + } } return [lsort -unique $csets] + } + + method nextmap {} { + if {[llength $mynextmap]} { return $mynextmap } + PullSuccessorRevisions tmp $myrevisions + set mynextmap [array get tmp] + return $mynextmap } method breakinternaldependencies {} { # This method inspects the changesets for internal # dependencies. Nothing is done if there are no @@ -84,11 +92,11 @@ # Array of dependencies (parent -> child). This is pulled from # the state, and limited to successors within the changeset. array set dependencies {} - PullSuccessorRevisions dependencies $myrevisions + PullInternalSuccessorRevisions dependencies $myrevisions if {![array size dependencies]} {return 0} ; # Nothing to break. log write 6 csets ...<$myid>....................................................... # We have internal dependencies to break. We now iterate over @@ -232,18 +240,40 @@ FROM revision R WHERE R.rid IN $theset "] } + method drop {} { + state transaction { + state run { + DELETE FROM changeset WHERE cid = $myid; + DELETE FROM csrevision WHERE cid = $myid; + } + } + foreach r $myrevisions { unset myrevmap($r) } + set pos [lsearch -exact $mychangesets $self] + set mychangesets [lreplace $mychangesets $pos $pos] + return + } + # # ## ### ##### ######## ############# ## State - variable myid ; # Id of the cset for the persistent state. - variable myproject ; # Reference of the project object the changeset belongs to. - variable mytype ; # rev or sym, where the cset originated from. - variable mysrcid ; # id of the metadata or symbol the cset is based on. - variable myrevisions ; # List of the file level revisions in the cset. + variable myid {} ; # Id of the cset for the persistent + # state. + variable myproject {} ; # Reference of the project object the + # changeset belongs to. + variable mytype {} ; # rev or sym, where the cset originated + # from. + variable mysrcid {} ; # Id of the metadata or symbol the cset + # is based on. + variable myrevisions {} ; # List of the file level revisions in + # the cset. + variable mynextmap {} ; # Dictionary mapping from the revisions + # to their successors. Cache to avoid + # loading this from the state more than + # once. # # ## ### ##### ######## ############# ## Internal methods typevariable mycounter 0 ; # Id counter for csets. @@ -254,11 +284,11 @@ SELECT tid, name FROM cstype; }] { set mycstype($name) $tid } return } - proc PullSuccessorRevisions {dv revisions} { + proc PullInternalSuccessorRevisions {dv revisions} { upvar 1 $dv dependencies set theset ('[join $revisions {','}]') foreach {rid child} [state run " -- Primary children @@ -284,11 +314,42 @@ "] { # Consider moving this to the integrity module. if {$rid == $child} { trouble internal "Revision $rid depends on itself." } - set dependencies($rid) $child + lappend dependencies($rid) $child + } + } + + proc PullSuccessorRevisions {dv revisions} { + upvar 1 $dv dependencies + set theset ('[join $revisions {','}]') + + foreach {rid child} [state run " + -- Primary children + SELECT R.rid, R.child + FROM revision R + WHERE R.rid IN $theset + AND R.child IS NOT NULL + UNION + -- Transition NTDB to trunk + SELECT R.rid, R.dbchild + FROM revision R + WHERE R.rid IN $theset + AND R.dbchild IS NOT NULL + UNION + -- Secondary (branch) children + SELECT R.rid, B.brid + FROM revision R, revisionbranchchildren B + WHERE R.rid IN $theset + AND R.rid = B.rid + "] { + # Consider moving this to the integrity module. + if {$rid == $child} { + trouble internal "Revision $rid depends on itself." + } + lappend dependencies($rid) $child } } proc InitializeBreakState {revisions} { upvar 1 pos pos cross cross range range depc depc delta delta \ @@ -318,11 +379,11 @@ # to ensure that the following loop runs correctly. # # Note 2: start == end is not possible. It indicates a # self-dependency due to the uniqueness of positions, # and that is something we have ruled out already, see - # PullSuccessorRevisions. + # PullInternalSuccessorRevisions. foreach {rid child} [array get dependencies] { set dkey [list $rid $child] set start $pos($rid) set end $pos($child)
Added tools/cvs2fossil/lib/c2f_prevlink.tcl version [01734176b9]
@@ -1,1 +1,248 @@ +## -*- tcl -*- +# # ## ### ##### ######## ############# ##################### +## Copyright (c) 2007 Andreas Kupries. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. +# +# This software consists of voluntary contributions made by many +# individuals. For exact contribution history, see the revision +# history and logs, available at http://fossil-scm.hwaci.com/fossil +# # ## ### ##### ######## ############# ##################### + +## Helper class for the pass 6 cycle breaker. Each instance refers to +## three changesets A, B, and C, with A a predecessor of B, and B +## predecessor of C, and the whole part of a dependency cycle. + +## Instances analyse the file level dependencies which gave rise to +## the changeset dependencies of A, B, and C, with the results used by +## the cycle breaker algorithm to find a good location where to at +## least weaken and at best fully break the cycle. + +# # ## ### ##### ######## ############# ##################### +## Requirements + +package require Tcl 8.4 ; # Required runtime. +package require snit ; # OO system. +package require vc::tools::misc ; # Text formatting +package require vc::tools::trouble ; # Error reporting. +package require vc::tools::log ; # User feedback. +package require vc::fossil::import::cvs::state ; # State storage. +package require vc::fossil::import::cvs::project::rev ; # Project level changesets + +# # ## ### ##### ######## ############# ##################### +## + +snit::type ::vc::fossil::import::cvs::project::revlink { + # # ## ### ##### ######## ############# + ## Public API + + constructor {prev cset next} { + set myprev $prev + set mycset $cset + set mynext $next + + # We perform the bulk of the analysis during construction. The + # file revisions held by the changeset CSET can be sorted into + # four categories. + + # 1. Revisions whose predecessors are not in PREV, nor are + # their successors found in NEXT. These revisions do not + # count, as they did not induce any of the two dependencies + # under consideration. They can be ignored. + + # 2. Revisions which have predecessors in PREV and sucessors + # in NEXT. They are called 'passthrough' in cvs2svn. They + # induce both dependencies under consideration and are thus + # critical in the creation of the cycle. As such they are + # also unbreakable :( + + # 3. Revisions which have predecessor in PREVE, but no + # successors in NEXT. As such they induced the incoming + # dependency, but not the outgoing one. + + # 4. Revisions which have no predecessors in PREVE, but their + # successors are in NEXT. As such they induced the outgoing + # dependency, but not the incoming one. + + # If we have no passthrough revisions then splitting the + # changeset between categories 3 and 4, with category 1 going + # wherever, will break the cycle. If category 2 revisions are + # present we can still perform the split, this will however + # not break the cycle, only weaken it. + + array set csetprevmap [Invert [$myprev nextmap]] + array set csetnextmap [$mycset nextmap] + + set prevrev [$myprev revisions] + set nextrev [$mynext revisions] + + foreach r [$mycset revisions] { + set rt [RT $r] + incr mycount($rt) + lappend mycategory($rt) $r + } + return + } + + # Result is TRUE if and only breaking myset will do some good. + method breakable {} { expr {$mycount(prev) || $mycount(next)} } + method passcount {} { return $mycount(pass) } + + method linkstomove {} { + # Return the number of revisions that would be moved should we + # split the changeset. + + set n [min2 $mycount(prev) $mycount(next)] + if {$n > 0 } { return $n } + return [max2 $mycount(prev) $mycount(next)] + } + + method betterthan {other} { + set sbreak [$self breakable] + set obreak [$other breakable] + + if {$sbreak && !$obreak} { return 1 } ; # self is better. + if {!$sbreak && $obreak} { return 0 } ; # self is worse. + + # Equality. Look at the counters. + # - Whichever has the lesser number of passthrough revisions + # is better, as more can be split off, weakening the cycle + # more. + # - Whichever has less links to move is better. + + set opass [$other passcount] + if {$mycount(pass) < $opass} { return 1 } ; # self is better. + if {$mycount(pass) > $opass} { return 0 } ; # self is worse. + + set smove [$self linkstomove] + set omove [$other linkstomove] + + if {$smove < $omove} { return 1 } ; # self is better. + + return 0 ; # Self is worse or equal, i.e. not better. + } + + method break {} { + if {![$self breakable]} { + trouble internal "Changeset <[$mycset id]> is not breakable." + } + + # One thing to choose when splitting CSET is where the + # revision in categories 1 and 2 (none and passthrough + # respectively) are moved to. This is done using the counters. + + if {!$mycount(prev)} { + # Nothing in category 3 => 1,2 go there, 4 the other. + set mycategory(prev) [concat $mycategory(none) $mycategory(pass)] + } elseif {!$mycount(next)} { + # Nothing in category 4 => 1,2 go there, 3 the other. + set mycategory(next) [concat $mycategory(none) $mycategory(pass)] + } elseif {$mycount(prev) < $mycount(next)} { + # Less predecessors than successors => 1,2 go to the + # sucessors. + set mycategory(next) [concat $mycategory(next) $mycategory(none) \ + $mycategory(pass)] + } else { + # Less successors than predecessors => 1,2 go to the + # predecessors. + set mycategory(next) [concat $mycategory(next) $mycategory(none) \ + $mycategory(pass)] + } + + # We now have the split in the mycategory(prev|next) + # elements. As part of the creation of the new changesets the + # old one is dropped from all databases, in and out of memory, + # and then destroyed. + + struct::list assign [$mycset data] project cstype cssrc + $mycset drop + $mycset destroy + + set newcsets {} + lappend newcsets [project::rev %AUTO% $project $cstype $cssrc $mycategory(prev)] + lappend newcsets [project::rev %AUTO% $project $cstype $cssrc $mycategory(next)] + + foreach c $newcsets { $c persist } + + return $newcsets + } + + # # ## ### ##### ######## ############# + ## State + + variable myprev {} ; # Reference to predecessor changeset in the link. + variable mycset {} ; # Reference to the main changeset of the link. + variable mynext {} ; # Reference to the successor changeset in the link. + + # Counters for the revision categories. + variable mycount -array { + none 0 + prev 0 + next 0 + pass 0 + } + # Lists of revisions for the various categories + variable mycategory -array { + none {} + prev {} + next {} + pass {} + } + + # # ## ### ##### ######## ############# + ## Internal methods + + proc RT {r} { + upvar 1 csetprevmap csetprevmap csetnextmap csetnextmap prevrev prevrev nextrev nextrev + + set inc [expr {[info exists csetprevmap($r)] + ? [struct::set size [struct::set intersect $csetprevmap($r) $prevrev]] + : 0}] + set out [expr {[info exists csetnextmap($r)] + ? [struct::set size [struct::set intersect $csetnextmap($r) $nextrev]] + : 0}] + + if {$inc && $out} { return pass } + if {$inc} { return prev } + if {$out} { return next } + return none + } + + proc Invert {dict} { + array set tmp {} + foreach {k values} $dict { + foreach v $values { lappend tmp($v) $k } + } + return [array get tmp] + } + + # # ## ### ##### ######## ############# + ## Configuration + + pragma -hastypeinfo no ; # no type introspection + pragma -hasinfo no ; # no object introspection + pragma -simpledispatch yes ; # simple fast dispatch + + # # ## ### ##### ######## ############# +} + +namespace eval ::vc::fossil::import::cvs::project { + namespace export revlink + namespace eval revlink { + namespace import ::vc::fossil::import::cvs::state + namespace import ::vc::tools::misc::* + namespace import ::vc::tools::trouble + namespace eval project { + namespace import ::vc::fossil::import::cvs::project::rev + } + namespace import ::vc::tools::log + log register csets + } +} + +# # ## ### ##### ######## ############# ##################### +## Ready +package provide vc::fossil::import::cvs::project::revlink 1.0 +return
Modified tools/cvs2fossil/lib/misc.tcl from [a87f1adea4] to [d837793c99].
@@ -37,19 +37,40 @@ proc nsp {n singular {plural {}}} { return "$n [sp $n $singular $plural]" } - # Find maximum in a list. + # Find maximum/minimum in a list. proc max {list} { set max -1 foreach e $list { if {$e < $max} continue set max $e } return $max + } + + proc min {list} { + set min {} + foreach e $list { + if {$min == {}} { + set min $e + } elseif {$e > $min} continue + set min $e + } + return $min + } + + proc max2 {a b} { + if {$a > $b} { return $a } + return $b + } + + proc min2 {a b} { + if {$a < $b} { return $a } + return $b } proc ldelete {lv item} { upvar 1 $lv list set pos [lsearch -exact $list $item] @@ -67,13 +88,13 @@ # # ## ### ##### ######## ############# } namespace eval ::vc::tools::misc { - namespace export sp nsp max ldelete striptrailingslash + namespace export sp nsp max min max2 min2 ldelete striptrailingslash } # ----------------------------------------------------------------------------- # Ready package provide vc::tools::misc 1.0 return
Modified tools/cvs2fossil/lib/pkgIndex.tcl from [dbfdadafec] to [a937f1cd3a].
@@ -19,14 +19,15 @@ package ifneeded vc::fossil::import::cvs::pass::initcsets 1.0 [list source [file join $dir c2f_pinitcsets.tcl]] package ifneeded vc::fossil::import::cvs::pass::breakrcycle 1.0 [list source [file join $dir c2f_pbreakrcycle.tcl]] package ifneeded vc::fossil::import::cvs::project 1.0 [list source [file join $dir c2f_project.tcl]] package ifneeded vc::fossil::import::cvs::project::lodmgr 1.0 [list source [file join $dir c2f_plodmgr.tcl]] package ifneeded vc::fossil::import::cvs::project::rev 1.0 [list source [file join $dir c2f_prev.tcl]] +package ifneeded vc::fossil::import::cvs::project::revlink 1.0 [list source [file join $dir c2f_prevlink.tcl]] package ifneeded vc::fossil::import::cvs::project::sym 1.0 [list source [file join $dir c2f_psym.tcl]] package ifneeded vc::fossil::import::cvs::project::trunk 1.0 [list source [file join $dir c2f_ptrunk.tcl]] package ifneeded vc::fossil::import::cvs::repository 1.0 [list source [file join $dir c2f_repository.tcl]] package ifneeded vc::fossil::import::cvs::state 1.0 [list source [file join $dir c2f_state.tcl]] package ifneeded vc::rcs::parser 1.0 [list source [file join $dir rcsparser.tcl]] package ifneeded vc::tools::log 1.0 [list source [file join $dir log.tcl]] package ifneeded vc::tools::misc 1.0 [list source [file join $dir misc.tcl]] package ifneeded vc::tools::trouble 1.0 [list source [file join $dir trouble.tcl]] package ifneeded vc::tools::id 1.0 [list source [file join $dir id.tcl]]