Fossil: Artifact Content

Artifact a02d54b2ad4d502956c78cd4c04008fff65d5f44

File tools/cvs2fossil/lib/c2f_cyclebreaker.tcl part of check-in [eabaea870a] - Added a number of assertions and must-not-happens with associated log output. Plus some small tweaks, and notes. by aku on 2007-11-24 04:15:46.
## -*- tcl -*-
# # ## ### ##### ######## ############# #####################
## Copyright (c) 2007 Andreas Kupries.
#
# This software is licensed as described in the file LICENSE, which
# you should have received as part of this distribution.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://fossil-scm.hwaci.com/fossil
# # ## ### ##### ######## ############# #####################

## This file provides a helper package for the passes 6 and 7 which
## contains the common code of the cycle breaking algorithm.

# # ## ### ##### ######## ############# #####################
## Requirements

package require Tcl 8.4                                   ; # Required runtime.
package require snit                                      ; # OO system.
package require struct::graph                             ; # Graph handling.
package require struct::list                              ; # Higher order list operations.
package require vc::tools::dot                            ; # User feedback. DOT export.
package require vc::tools::log                            ; # User feedback.
package require vc::tools::trouble                        ; # Error reporting.
package require vc::tools::misc                           ; # Text formatting.
package require vc::fossil::import::cvs::project::rev     ; # Project level changesets
package require vc::fossil::import::cvs::project::revlink ; # Cycle links.

# # ## ### ##### ######## ############# #####################
## 

snit::type ::vc::fossil::import::cvs::cyclebreaker {
    # # ## ### ##### ######## #############
    ## Public API

    typemethod precmd {cmd} {
	::variable myprecmd $cmd
	return
    }

    typemethod savecmd {cmd} {
	::variable mysavecmd $cmd
	return
    }
 
    typemethod breakcmd {cmd} {
	::variable mybreakcmd $cmd
	return
    }

    # # ## ### ##### ######## #############

    typemethod dotsto {path} {
	::variable mydotdestination $path
	return
    }

    typemethod dot {label changesets} {
	::variable mydotprefix $label
	::variable mydotid     0

	set dg [Setup $changesets 0]
	Mark $dg
	$dg destroy
	return
    }

    typemethod mark {graph suffix {subgraph {}}} {
	Mark $graph $suffix $subgraph
	return
    }

    # # ## ### ##### ######## #############

    typemethod run {label changesetcmd} {
	::variable myat        0
	::variable mydotprefix $label
	::variable mydotid     0

	# We create a graph of the revision changesets, using the file
	# level dependencies to construct a first approximation of the
	# dependencies at the project level. Then we look for cycles
	# in that graph and break them.

	# 1. Create nodes for all relevant changesets and a mapping
	#    from the revisions to their changesets/nodes.

	set changesets [uplevel #0 $changesetcmd]
	set dg [Setup $changesets]

	# 3. Lastly we iterate the graph topologically. We mark off
	#    the nodes which have no predecessors, in order from
	#    oldest to youngest, saving and removing dependencies. If
	#    we find no nodes without predecessors we have a cycle,
	#    and work on breaking it.

	log write 3 cyclebreaker {Now sorting the changesets, breaking cycles}

	InitializeCandidates $dg
	while {1} {
	    while {[WithoutPredecessor $dg n]} {
		ProcessedHook $dg $n $myat
		$dg node delete $n
		incr myat
		ShowPendingNodes
	    }

	    if {![llength [dg nodes]]} break

	    BreakCycleHook       $dg
	    InitializeCandidates $dg
	}

	$dg destroy

	log write 3 cyclebreaker Done.
	ClearHooks

	# Reread the graph and dump its final form, if graph export
	# was activated.

	::variable mydotdestination
	if {$mydotdestination eq ""} return

	set   dg [Setup [uplevel #0 $changesetcmd] 0]
	Mark $dg -done
	$dg destroy
	return
    }

    # # ## ### ##### ######## #############

    typemethod break {graph} {
	BreakCycle $graph [FindCycle $graph]
	return
    }

    typemethod replace {graph n replacements} {
	Replace $graph $n $replacements
	return
    }

    # # ## ### ##### ######## #############
    ## Internal methods

    proc Setup {changesets {log 1}} {
	if {$log} {
	    log write 3 cyclebreaker "Creating changeset graph, filling with nodes"
	    log write 3 cyclebreaker "Adding [nsp [llength $changesets] node]"
	}

	set dg [struct::graph dg]

	foreach cset $changesets {
	    $dg node insert $cset
	    $dg node set    $cset timerange [$cset timerange]
	    $dg node set    $cset label     [ID $cset]
	    $dg node set    $cset __id__    [$cset id]
	}

	# 2. Find for all relevant changeset their revisions and their
	#    dependencies. Map the latter back to changesets and
	#    construct the corresponding arcs.

	if {$log} {
	    log write 3 cyclebreaker {Setting up node dependencies}
	}

	foreach cset $changesets {
	    foreach succ [$cset successors] {
		# Changesets may have dependencies outside of the
		# chosen set. These are ignored
		if {![$dg node exists $succ]} continue
		$dg arc insert $cset $succ

		# Check for changesets referencing themselves. Such a
		# loop shows that the changeset in question has
		# internal dependencies. Something which is supposed
		# to be not possible, as pass 5 (InitCsets) takes care
		# to transform internal into external dependencies by
		# breaking the relevant changesets apart. So having
		# one indicates big trouble in pass 5. We report them
		# and dump internal structures to make it easier to
		# trace the links causing the problem.
		if {$succ eq $cset} {
		    trouble fatal "Self-referencing changeset <[$cset id]>"
		    log write 2 cyclebreaker "LOOP changeset <[$cset id]> __________________"
		    array set nmap [$cset nextmap]
		    foreach r [lsort -dict [array names nmap]] {
			foreach succrev $nmap($r) {
			    log write 2 cyclebreaker \
				"LOOP * rev <$r> --> rev <$succrev> --> cs [join [struct::list map [project::rev ofrev $succrev] [myproc ID]] { }]"
			}
		    }
		}
	    }
	}

	# Run the user hook to manipulate the graph before
	# consummation.

	if {$log} { Mark $dg -start }
	PreHook $dg

	# This kills the application if loops (see above) were found.
	trouble abort?
	return  $dg
    }

    # Instead of searching the whole graph for the degree-0 nodes in
    # each iteration we compute the list once to start, and then only
    # update it incrementally based on the outgoing neighbours of the
    # node chosen for commit.

    proc InitializeCandidates {dg} {
	# bottom = list (list (node, range min, range max))
	::variable mybottom
	foreach n [$dg nodes] {
	    if {[$dg node degree -in $n]} continue
	    lappend mybottom [linsert [$dg node get $n timerange] 0 $n]
	}
	set mybottom [lsort -index 1 -integer [lsort -index 2 -integer $mybottom]]
	ShowPendingNodes
	return
    }

    proc WithoutPredecessor {dg nv} {
	::variable mybottom

	upvar 1 $nv n
	if {![llength $mybottom]} { return 0 }

	set n [lindex [lindex $mybottom 0] 0]
	set mybottom [lrange $mybottom 1 end]
	set changed 0

	# Update list of nodes without predecessor, based on the
	# outgoing neighbours of the chosen node. This should be
	# faster than iterating of the whole set of nodes, finding all
	# without predecessors, sorting them by time, etc. pp.
	foreach out [$dg nodes -out $n] {
	    if {[$dg node degree -in $out] > 1} continue
	    # Degree-1 neighbour, will have no predecessors after the
	    # removal of n. Put on the list.
	    lappend mybottom [linsert [$dg node get $out timerange] 0 $out]
	    set changed 1
	}
	if {$changed} {
	    set mybottom [lsort -index 1 -integer [lsort -index 2 -integer $mybottom]]
	}

	# We do not delete the node immediately, to allow the Save
	# procedure to save the dependencies as well (encoded in the
	# arcs).
	return 1
    }

    proc ShowPendingNodes {} {
	if {[log verbosity?] < 10} return
	::variable mybottom
	log write 10 cyclebreaker \
	    "Pending: [struct::list map $mybottom [myproc FormatPendingItem]]"
	return
    }

    proc FormatPendingItem {item} { lreplace $item 0 0 <[[lindex $item 0] id]> }

    proc FindCycle {dg} {
	# This procedure is run if and only the graph is not empty and
	# all nodes have predecessors. This means that each node is
	# either part of a cycle or (indirectly) depending on a node
	# in a cycle. We can start at an arbitrary node, follow its
	# incoming edges to its predecessors until we see a node a
	# second time. That node closes the cycle and the beginning is
	# its first occurence. Note that we can choose an arbitrary
	# predecessor of each node as well, we do not have to search.

	# We record for each node the index of the first appearance in
	# the path, making it easy at the end to cut the cycle from
	# it.

	# Choose arbitrary node to start our search at.
	set start [lindex [$dg nodes] 0]

	# Initialize state, path of seen nodes, and when seen.
	set       path {}
	array set seen {}

	while {1} {
	    # Stop searching when we have seen the current node
	    # already, the circle has been closed.
	    if {[info exists seen($start)]} break
	    lappend path $start
	    set seen($start) [expr {[llength $path]-1}]
	    # Choose arbitrary predecessor
	    set start [lindex [$dg nodes -in $start] 0]
	}

	return [struct::list reverse [lrange $path $seen($start) end]]
    }

    proc ID {cset} { return "<[$cset id]>" }

    proc BreakCycle {dg cycle} {
	# The cycle we have gotten is broken by breaking apart one or
	# more of the changesets in the cycle. This causes us to
	# create one or more changesets which are to be committed,
	# added to the graph, etc. pp.

	# NOTE/TODO. Move this map operation to project::rev, as typemethod.
	set cprint [join [struct::list map $cycle [myproc ID]] { }]

	lappend cycle [lindex $cycle 0] [lindex $cycle 1]
	set bestlink {}
	set bestnode {}

	foreach \
	    prev [lrange $cycle 0 end-2] \
	    cset [lrange $cycle 1 end-1] \
	    next [lrange $cycle 2 end] {

		# Each triple PREV -> CSET -> NEXT of changesets, a
		# 'link' in the cycle, is analysed and the best
		# location where to at least weaken the cycle is
		# chosen for further processing.

		set link [project::revlink %AUTO% $prev $cset $next]
		if {$bestlink eq ""} {
		    set bestlink $link
		    set bestnode $cset
		} elseif {[$link betterthan $bestlink]} {
		    $bestlink destroy
		    set bestlink $link
		    set bestnode $cset
		} else {
		    $link destroy
		}
	    }

	log write 5 cyclebreaker "Breaking cycle ($cprint) by splitting changeset <[$bestnode id]>"
	set ID [$bestnode id]
	Mark $dg -${ID}-before

	set newcsets [$bestlink break]
	$bestlink destroy

        # At this point the old changeset (BESTNODE) is gone
        # already. We remove it from the graph as well and then enter
        # the fragments generated for it.

	Replace $dg $bestnode $newcsets

	Mark $dg -${ID}-after
	return
    }

    # TODO: This should be a graph method.
    proc HasArc {dg a b} {
	#8.5: return [expr {$b in [$dg nodes -out $a]}]
	if {[lsearch -exact [$dg nodes -out $a] $b] < 0} { return 0 }
	return 1
    }

    proc Mark {dg {suffix {}} {subgraph {}}} {
	::variable mydotdestination
	if {$mydotdestination eq ""} return
	::variable mydotprefix
	::variable mydotid
	set fname $mydotdestination/${mydotprefix}${mydotid}${suffix}.dot
	file mkdir [file dirname $fname]
	dot write $dg $mydotprefix$suffix $fname $subgraph
	incr mydotid

	log write 5 cyclebreaker ".dot export $fname"
	return
    }

    proc Replace {dg n replacements} {
	# NOTE. We have to get the list of incoming neighbours and
	# recompute their successors after the new nodes have been
	# inserted. Their outgoing arcs will now go to one or both of
	# the new nodes, and not redoing them may cause us to forget
	# circles, leaving them in, unbroken.

	set pre [$dg nodes -in $n]

        $dg node delete $n

	foreach cset $replacements {
	    $dg node insert $cset
	    $dg node set    $cset timerange [$cset timerange]
	    $dg node set    $cset label     [ID $cset]
	    $dg node set    $cset __id__    [$cset id]
	}

	foreach cset $replacements {
	    foreach succ [$cset successors] {
		# The new changesets may have dependencies outside of
		# the chosen set. These are ignored
		if {![$dg node exists $succ]} continue
		$dg arc insert $cset $succ
		if {$succ eq $cset} {
		    trouble internal "Self-referencing changeset <[$cset id]>"
		}
	    }
	}
	foreach cset $pre {
	    foreach succ [$cset successors] {
		# Note that the arc may already exist in the graph. If
		# so ignore it. The new changesets may have
		# dependencies outside of the chosen set. These are
		# ignored
		if {![$dg node exists $succ]} continue
		if {[HasArc $dg $cset $succ]} continue;# TODO should be graph method.
		$dg arc insert $cset $succ
	    }
	}

	return
    }

    # # ## ### ##### ######## #############
    ## Callback invokation ...

    proc PreHook {graph} {
	# Give the user of the cycle breaker the opportunity to work
	# with the graph between setup and consummation.

	::variable myprecmd
	if {![llength $myprecmd]} return

	uplevel #0 [linsert $myprecmd end $graph]
	Mark $graph -pre-done
	return
    }

    proc ProcessedHook {dg cset pos} {
	# Give the user of the cycle breaker the opportunity to work
	# with the changeset before it is removed from the graph.

	::variable mysavecmd
	if {![llength $mysavecmd]} return

	uplevel #0 [linsert $mysavecmd end $dg $pos $cset]
	return
    }

    proc BreakCycleHook {graph} {
	# Call out to the chosen algorithm for cycle breaking. Finding
	# a cycle if no breaker was chosen is an error.

	::variable mybreakcmd
	if {![llength $mybreakcmd]} {
	    trouble fatal "Found a cycle, expecting none."
	    exit 1
	}

	uplevel #0 [linsert $mybreakcmd end $graph]
	return
    }

    proc ClearHooks {} {
	::variable myprecmd   {}
	::variable mysavecmd  {}
	::variable mybreakcmd {}
	return
    }

    # # ## ### ##### ######## #############

    typevariable myat      0 ; # Counter for commit ids for the
			       # changesets.
    typevariable mybottom {} ; # List of the candidate nodes for
			       # committing.

    typevariable myprecmd   {} ; # Callback, change graph before walk.
    typevariable mysavecmd  {} ; # Callback, for each processed node.
    typevariable mybreakcmd {} ; # Callback, for each found cycle.

    typevariable mydotdestination {} ; # Destination directory for the
				       # generated .dot files.
    typevariable mydotprefix      {} ; # Prefix for dot files when
				       # exporting the graphs.
    typevariable mydotid           0 ; # Counter for dot file name
				       # generation.

    # # ## ### ##### ######## #############
    ## Configuration

    pragma -hasinstances   no ; # singleton
    pragma -hastypeinfo    no ; # no introspection
    pragma -hastypedestroy no ; # immortal

    # # ## ### ##### ######## #############
}

namespace eval ::vc::fossil::import::cvs {
    namespace export cyclebreaker
    namespace eval cyclebreaker {
	namespace eval project {
	    namespace import ::vc::fossil::import::cvs::project::rev
	    namespace import ::vc::fossil::import::cvs::project::revlink
	}
	namespace import ::vc::tools::misc::*
	namespace import ::vc::tools::log
	namespace import ::vc::tools::trouble
	namespace import ::vc::tools::dot
	log register cyclebreaker
    }
}

# # ## ### ##### ######## ############# #####################
## Ready

package provide vc::fossil::import::cvs::cyclebreaker 1.0
return