Overview
SHA1 Hash: | 85bd219d0b2eca43bad0d4e03ed6c04ea4bfdd02 |
---|---|
Date: | 2007-11-13 07:22:35 |
User: | aku |
Comment: | Continued work on pass 6. Completed creation of changeset graph (nodes, dependencies), started on topological iteration and breaking cycles. Basic iteration is complete, fiding a cycle ditto. Not yet done is to actually break a found cycle. Extended the changeset class with the necessary accessor methods (getting cset type, successors, time range). Note: Looking at my code it may be that my decision to save the cset order caused this pass to subsume the RevisionTopologicalSortPass of cvs2svn. Check again when I am done. Note 2: The test case (tcl repository, tcl project) had no cycles. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified tools/cvs2fossil/lib/c2f_pbreakrcycle.tcl from [fd1c396037] to [0d5836726f].
@@ -17,11 +17,15 @@ # # ## ### ##### ######## ############# ##################### ## Requirements package require Tcl 8.4 ; # Required runtime. package require snit ; # OO system. +package require struct::graph ; # Graph handling. +package require struct::list ; # Higher order list operations. package require vc::tools::log ; # User feedback. +package require vc::fossil::import::cvs::state ; # State storage. +package require vc::fossil::import::cvs::project::rev ; # Project level changesets # # ## ### ##### ######## ############# ##################### ## Register the pass with the management vc::fossil::import::cvs::pass define \ @@ -37,10 +41,19 @@ ## Public API typemethod setup {} { # Define the names and structure of the persistent state of # this pass. + + state writing csorder { + -- Commit order of changesets based on their dependencies + cid INTEGER NOT NULL REFERENCES changeset, + pos INTEGER NOT NULL, + UNIQUE (cid), + UNIQUE (pos) + } + return } typemethod load {} { # Pass manager interface. Executed to load data computed by @@ -50,22 +63,186 @@ } typemethod run {} { # Pass manager interface. Executed to perform the # functionality of the pass. + + state reading revision + + # We create a graph of the revision changesets, using the file + # level dependencies to construct a first approximation of + # them at the project level. Then look for cycles in that + # graph and break them. + + # 1. Create nodes for all relevant changesets and a mapping + # from the revisions to their changesets/nodes. + + log write 3 brkrcycle {Creating changeset graph, filling with nodes} + + set dg [struct::graph dg] + + state transaction { + foreach cset [project::rev all] { + if {[$cset bysymbol]} continue + dg node insert $cset + dg node set $cset timerange [$cset timerange] + } + } + + # 2. Find for all relevant changeset their revisions and their + # dependencies. Map the latter back to changesets and + # construct the corresponding arcs. + + log write 3 brkrcycle {Setting up node dependencies} + + state transaction { + foreach cset [project::rev all] { + if {[$cset bysymbol]} continue + foreach succ [$cset successors] { + dg arc insert $cset $succ + } + } + } + + # 3. Lastly we iterate the graph topologically. We mark off + # the nodes which have no predecessors, in order from + # oldest to youngest, saving and removing dependencies. If + # we find no nodes without predecessors we have a cycle, + # and work on breaking it. + + log write 3 brkrcycle {Computing changeset order, breaking cycles} + + InitializeCandidates $dg + state transaction { + while {1} { + while {[WithoutPredecessor $dg n]} { + SaveAndRemove $dg $n + } + if {![llength [dg nodes]]} break + set cycle [FindCycle $dg] + BreakCycle $dg $cycle + } + } + return } typemethod discard {} { # Pass manager interface. Executed for all passes after the # run passes, to remove all data of this pass from the state, # as being out of date. + + state discard csorder return } # # ## ### ##### ######## ############# ## Internal methods + + # Instead of searching the whole graph for the degree-0 nodes in + # each iteration we compute the list once to start, and then only + # update it incrementally based on the outgoing neighbours of the + # node chosen for commit. + + proc InitializeCandidates {dg} { + # bottom = list (list (node, range min, range max)) + ::variable bottom + foreach n [$dg nodes] { + if {[$dg node degree -in $n]} continue + lappend bottom [linsert [$dg node get $n timerange] 0 $n] + } + set bottom [lsort -index 1 -integer [lsort -index 2 -integer $bottom]] + return + } + + proc WithoutPredecessor {dg nv} { + ::variable bottom + + upvar 1 $nv n + if {![llength $bottom]} { return 0 } + + set n [lindex [lindex $bottom 0] 0] + set bottom [lrange $bottom 1 end] + set changed 0 + + # Update list of nodes without predecessor, based on the + # outgoing neighbours of the chosen node. This should be + # faster than iterating of the whole set of nodes, finding all + # without predecessors, sorting them by time, etc. pp. + foreach out [$dg nodes -out $n] { + if {[$dg node degree -in $out] > 1} continue + # Degree-1 neighbour, will have no predecessors after the + # removal of n. Put on the list. + lappend bottom [linsert [$dg node get $out timerange] 0 $out] + set changed 1 + } + if {$changed} { + set bottom [lsort -index 1 -integer [lsort -index 2 -integer $bottom]] + } + + # We do not delete the node immediately, to allow the Save + # procedure to save the dependencies as well (encoded in the + # arcs). + return 1 + } + + proc SaveAndRemove {dg n} { + ::variable at + set cid [$n id] + + log write 4 breakrcycle "Comitting @ $at: <$cid>" + state run { + INSERT INTO csorder (cid, pos) + VALUES ($cid, $at) + } + # TODO: Write the project level changeset dependencies as well. + incr at + $dg node delete $n + return + } + + proc FindCycle {dg} { + # This procedure is run if and only the graph is not empty and + # all nodes have predecessors. This means that each node is + # either part of a cycle or (indirectly) depending on a node + # in a cycle. We can start at an arbitrary node, follow its + # incoming edges to its predecessors until we see a node a + # second time. That node closes the cycle and the beginning is + # its first occurence. Note that we can choose an arbitrary + # predecessor of each node as well, we do not have to search. + + # We record for each node the index of the first appearance in + # the path, making it easy at the end to cut the cycle from + # it. + + # Choose arbitrary node to start our search at. + set start [lindex [$dg nodes] 0] + + # Initialize state, path of seen nodes, and when seen. + set path {} + array set seen {} + + while {1} { + # Stop searching when we have seen the current node + # already, the circle has been closed. + if {[info exists seen($start)]} break + lappend path $start + set seen($start) [llength $path] + # Choose arbitrary predecessor + set start [lindex [$dg nodes -in $start] 0] + } + + return [struct::list reverse [lrange $path $seen($start) end]] + } + + proc BreakCycle {dg cycle} { + trouble internal "Break cycle <$cycle>" + return + } + + typevariable at 0 ; # Counter for commit ids for the changesets. + typevariable bottom {} ; # List of candidate nodes for committing. # # ## ### ##### ######## ############# ## Configuration pragma -hasinstances no ; # singleton @@ -74,12 +251,16 @@ # # ## ### ##### ######## ############# } namespace eval ::vc::fossil::import::cvs::pass { - namespace export initcsets - namespace eval initcsets { + namespace export breakrcycle + namespace eval breakrcycle { + namespace import ::vc::fossil::import::cvs::state + namespace eval project { + namespace import ::vc::fossil::import::cvs::project::rev + } namespace import ::vc::tools::log log register brkrcycle } }
Modified tools/cvs2fossil/lib/c2f_prev.tcl from [971e3d4db2] to [27b0babb50].
@@ -35,17 +35,35 @@ set myproject $project set mytype $cstype set mysrcid $srcid set myrevisions $revisions - # Keep track of the generated changesets. + # Keep track of the generated changesets and of the inverse + # mapping from revisions to them. lappend mychangesets $self + foreach r $revisions { set myrevmap($r) $self } return } - method id {} { return $myid } + method id {} { return $myid } + method revisions {} { return $myrevisions } + method setid {id} { set myid $id ; return } + + method bysymbol {} { return [expr {$mytype eq "sym"}] } + method byrevision {} { return [expr {$mytype eq "rev"}] } + + method successors {} { + # NOTE / FUTURE: Possible bottleneck. + array set dependencies {} + PullSuccessorRevisions dependencies $myrevisions + set csets {} + foreach {_ child} [array get dependencies] { + lappend csets $myrevmap($child) + } + return [lsort -unique $csets] + } method breakinternaldependencies {} { # This method inspects the changesets for internal # dependencies. Nothing is done if there are no # such. Otherwise the changeset is split into a set of @@ -66,11 +84,11 @@ # Array of dependencies (parent -> child). This is pulled from # the state, and limited to successors within the changeset. array set dependencies {} - PullInternalDependencies dependencies $myrevisions + PullSuccessorRevisions dependencies $myrevisions if {![array size dependencies]} {return 0} ; # Nothing to break. log write 6 csets ...<$myid>....................................................... # We have internal dependencies to break. We now iterate over @@ -205,10 +223,19 @@ } } return } + method timerange {} { + set theset ('[join $myrevisions {','}]') + return [state run " + SELECT MIN(R.date), MAX(R.date) + FROM revision R + WHERE R.rid IN $theset + "] + } + # # ## ### ##### ######## ############# ## State variable myid ; # Id of the cset for the persistent state. variable myproject ; # Reference of the project object the changeset belongs to. @@ -227,11 +254,11 @@ SELECT tid, name FROM cstype; }] { set mycstype($name) $tid } return } - proc PullInternalDependencies {dv revisions} { + proc PullSuccessorRevisions {dv revisions} { upvar 1 $dv dependencies set theset ('[join $revisions {','}]') foreach {rid child} [state run " -- Primary children @@ -291,11 +318,11 @@ # to ensure that the following loop runs correctly. # # Note 2: start == end is not possible. It indicates a # self-dependency due to the uniqueness of positions, # and that is something we have ruled out already, see - # PullInternalDependencies. + # PullSuccessorRevisions. foreach {rid child} [array get dependencies] { set dkey [list $rid $child] set start $pos($rid) set end $pos($child) @@ -470,11 +497,12 @@ return } # # ## ### ##### ######## ############# - typevariable mychangesets {} ; # List of all known changesets. + typevariable mychangesets {} ; # List of all known changesets. + typevariable myrevmap -array {} ; # Map from revisions to their changeset. typemethod all {} { return $mychangesets }