@@ -17,8 +17,10 @@
## Requirements
package require Tcl 8.4 ; # Required runtime.
package require snit ; # OO system.
+package require vc::tools::misc ; # Text formatting
+package require vc::tools::trouble ; # Error reporting.
package require vc::tools::log ; # User feedback.
package require vc::fossil::import::cvs::state ; # State storage.
# # ## ### ##### ######## ############# #####################
@@ -48,156 +50,137 @@
# fragments without internal dependencies, transforming the
# internal dependencies into external ones. The new changesets
# are added to the list of all changesets.
- # Actually at most one split is performed, resulting in at
- # most one additional fragment. It is the caller's
- # responsibility to spli the resulting fragments further.
-
- # The code checks only sucessor dependencies, automatically
- # covering the predecessor dependencies as well (A sucessor
- # dependency a -> b is a predecessor dependency b -> a).
+ # We perform all necessary splits in one go, instead of only
+ # one. The previous algorithm, adapted from cvs2svn, computed
+ # a lot of state which was thrown away and then computed again
+ # for each of the fragments. It should be easier to update and
+ # reuse that state.
+
+ # The code checks only sucessor dependencies, as this
+ # automatically covers the predecessor dependencies as well (A
+ # successor dependency a -> b is also a predecessor dependency
+ # b -> a).
# Array of dependencies (parent -> child). This is pulled from
# the state, and limited to successors within the changeset.
+
array set dependencies {}
-
- set theset ('[join $myrevisions {','}]')
-
- foreach {rid child} [state run "
- SELECT R.rid, R.child
- FROM revision R
- WHERE R.rid IN $theset
- AND R.child IS NOT NULL
- AND R.child IN $theset
- UNION
- SELECT R.rid, R.dbchild
- FROM revision R
- WHERE R.rid IN $theset
- AND R.dbchild IS NOT NULL
- AND R.dbchild IN $theset
- UNION
- SELECT R.rid, B.brid
- FROM revision R, revisionbranchchildren B
- WHERE R.rid IN $theset
- AND R.rid = B.rid
- AND B.brid IN $theset
- "] {
- # Consider moving this to the integrity module.
- if {$rid == $child} {
- trouble internal "Revision $rid depends on itself."
- }
- set dependencies($rid) $child
- }
-
+ PullInternalDependencies dependencies $myrevisions
if {![array size dependencies]} {return 0} ; # Nothing to break.
+
+ log write 6 csets ...<$myid>.......................................................
# We have internal dependencies to break. We now iterate over
# all positions in the list (which is chronological, at least
# as far as the timestamps are correct and unique) and
# determine the best position for the break, by trying to
- # break as many dependencies as possible in one go.
-
- # First we create a map of positions to make it easier to
- # determine whether a dependency cross a particular index.
-
- array set pos {}
- array set crossing {}
- set n 0
- foreach rev $myrevisions {
- set pos($rev) $n
- set crossing($n) 0
- incr n
- }
-
- # Secondly we count the crossings per position, by iterating
- # over the recorded internal dependencies.
-
- foreach {rid child} [array get dependencies] {
- set start $pos($rid)
- set end $pos($child)
-
- # Note: If the timestamps are badly out of order it is
- # possible to have a backward successor dependency,
- # i.e. with start > end. We may have to swap the
- # indices to ensure that the following loop runs
- # correctly.
- #
- # Note 2: start == end is not possible. It indicates a
- # self-dependency due to the uniqueness of
- # positions, and that is something we have ruled
- # out already.
-
- if {$start > $end} {
- while {$end < $start} { incr crossing($end) ; incr end }
+ # break as many dependencies as possible in one go. When a
+ # break was found this is redone for the fragments coming and
+ # after, after upding the crossing information.
+
+ # Data structures:
+ # Map: POS revision id -> position in list.
+ # CROSS position in list -> number of dependencies crossing it
+ # DEPC dependency -> positions it crosses
+ # List: RANGE Of the positions itself.
+ # A dependency is a single-element map parent -> child
+
+ InitializeBreakState $myrevisions
+
+ set fragments {}
+ set pending [list $range]
+ set at 0
+ array set breaks {}
+
+ while {$at < [llength $pending]} {
+ set current [lindex $pending $at]
+
+ log write 6 csets ". . .. ... ..... ........ ............."
+ log write 6 csets "Scheduled [join [PRs [lrange $pending $at end]] { }]"
+ log write 6 csets "Considering [PR $current] \[$at/[llength $pending]\]"
+
+ set best [FindBestBreak $current]
+
+ if {$best < 0} {
+ # The inspected range has no internal
+ # dependencies. This is a complete fragment.
+ lappend fragments $current
+
+ log write 6 csets "No breaks, final"
} else {
- while {$start < $end} { incr crossing($start) ; incr start }
- }
- }
-
- # Now we can determine the best break location. First we look
- # for the locations with the maximal number of crossings. If
- # there are several we look for the shortest time interval
- # among them. If we still have multiple possibilities after
- # that we select the smallest index among these.
-
- set max -1
- set best {}
-
- foreach key [array names crossing] {
- set now $crossing($key)
- if {$now > $max} {
- set max $now
- set best $key
- continue
- } elseif {$now == $max} {
- lappend best $key
+ # Split the range and schedule the resulting fragments
+ # for further inspection. Remember the number of
+ # dependencies cut before we remove them from
+ # consideration, for documentation later.
+
+ set breaks($best) $cross($best)
+
+ log write 6 csets "Best break @ $best, cuts [nsp $cross($best) dependency dependencies]"
+
+ # Note: The value of best is an abolute location in
+ # myrevisions. Use the start of current to make it an
+ # index absolute to current.
+
+ set brel [expr {$best - [lindex $current 0]}]
+ set bnext $brel ; incr bnext
+ set fragbefore [lrange $current 0 $brel]
+ set fragafter [lrange $current $bnext end]
+
+ log write 6 csets "New pieces [PR $fragbefore] [PR $fragafter]"
+
+ if {![llength $fragbefore]} {
+ trouble internal "Tried to split off a zero-length fragment at the beginning"
+ }
+ if {![llength $fragafter]} {
+ trouble internal "Tried to split off a zero-length fragment at the end"
+ }
+
+ lappend pending $fragbefore $fragafter
+ CutAt $best
}
+
+ incr at
+ }
+
+ log write 6 csets ". . .. ... ..... ........ ............."
+
+ # Create changesets for the fragments, reusing the current one
+ # for the first fragment. We sort them in order to allow
+ # checking for gaps and nice messages.
+
+ set fragments [lsort -index 0 -integer $fragments]
+
+ #puts \t.[join [PRs $fragments] .\n\t.].
+
+ Border [lindex $fragments 0] firsts firste
+
+ if {$firsts != 0} {
+ trouble internal "Bad fragment start @ $firsts, gap, or before beginning of the range"
}
- if {[llength $best] > 1} {
- set min -1
- set newbest {}
- foreach at $best {
- set rat [lindex $myrevisions $at] ; incr at
- set rnext [lindex $myrevisions $at] ; incr at -1
- set tat [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rat }] 0]
- set tnext [lindex [state run {SELECT R.date FROM revision R WHERE R.rid = $rnext}] 0]
- set delta [expr {$tnext - $tat}]
- if {($min < 0) || ($delta < $min)} {
- set min $delta
- set newbest $at
- } elseif {$delta == $min} {
- lappend newbest $at
- }
+ set laste $firste
+ foreach fragment [lrange $fragments 1 end] {
+ Border $fragment s e
+ if {$laste != ($s - 1)} {
+ trouble internal "Bad fragment border <$laste | $s>, gap or overlap"
}
- set best $newbest
- }
-
- if {[llength $best] > 1} {
- set best [lindex [lsort -integer -increasing $best] 0]
+
+ set new [$type %AUTO% $myproject $mytype $mysrcid [lrange $myrevisions $s $e]]
+ lappend csets $new
+
+ log write 4 csets "Breaking <$myid> @ $laste, new <[$new id]>, cutting $breaks($laste)"
+
+ set laste $e
}
- # Now we can split off a fragment.
-
- set bnext $best ; incr bnext
- set revbefore [lrange $myrevisions 0 $best]
- set revafter [lrange $myrevisions $bnext end]
-
- if {![llength $revbefore]} {
- trouble internal "Tried to split off a zero-length fragment at the beginning"
- }
- if {![llength $revafter]} {
- trouble internal "Tried to split off a zero-length fragment at the end"
+ if {$laste != ([llength $myrevisions]-1)} {
+ trouble internal "Bad fragment end @ $laste, gap, or beyond end of the range"
}
- lappend csets [set new [$type %AUTO% $myproject $mytype $mysrcid $revafter]]
- set myrevisions $revbefore
-
- log write 4 csets "Breaking <$myid> @$best, making <[$new id]>, cutting $crossing($best)"
-
- #puts "\tKeeping <$revbefore>"
- #puts "\tSplit off <$revafter>"
+ # Put the first fragment into the current changeset.
+ set myrevisions [lrange $myrevisions 0 $firste]
return 1
}
@@ -244,8 +227,249 @@
}] { set mycstype($name) $tid }
return
}
+ proc PullInternalDependencies {dv revisions} {
+ upvar 1 $dv dependencies
+ set theset ('[join $revisions {','}]')
+
+ foreach {rid child} [state run "
+ -- Primary children
+ SELECT R.rid, R.child
+ FROM revision R
+ WHERE R.rid IN $theset
+ AND R.child IS NOT NULL
+ AND R.child IN $theset
+ UNION
+ -- Transition NTDB to trunk
+ SELECT R.rid, R.dbchild
+ FROM revision R
+ WHERE R.rid IN $theset
+ AND R.dbchild IS NOT NULL
+ AND R.dbchild IN $theset
+ UNION
+ -- Secondary (branch) children
+ SELECT R.rid, B.brid
+ FROM revision R, revisionbranchchildren B
+ WHERE R.rid IN $theset
+ AND R.rid = B.rid
+ AND B.brid IN $theset
+ "] {
+ # Consider moving this to the integrity module.
+ if {$rid == $child} {
+ trouble internal "Revision $rid depends on itself."
+ }
+ set dependencies($rid) $child
+ }
+ }
+
+ proc InitializeBreakState {revisions} {
+ upvar 1 pos pos cross cross range range depc depc delta delta \
+ dependencies dependencies
+
+ # First we create a map of positions to make it easier to
+ # determine whether a dependency crosses a particular index.
+
+ array set pos {}
+ array set cross {}
+ array set depc {}
+ set range {}
+ set n 0
+ foreach rev $revisions {
+ lappend range $n
+ set pos($rev) $n
+ set cross($n) 0
+ incr n
+ }
+
+ # Secondly we count the crossings per position, by iterating
+ # over the recorded internal dependencies.
+
+ # Note: If the timestamps are badly out of order it is
+ # possible to have a backward successor dependency,
+ # i.e. with start > end. We may have to swap the indices
+ # to ensure that the following loop runs correctly.
+ #
+ # Note 2: start == end is not possible. It indicates a
+ # self-dependency due to the uniqueness of positions,
+ # and that is something we have ruled out already, see
+ # PullInternalDependencies.
+
+ foreach {rid child} [array get dependencies] {
+ set dkey [list $rid $child]
+ set start $pos($rid)
+ set end $pos($child)
+ set crosses {}
+
+ if {$start > $end} {
+ while {$end < $start} {
+ lappend crosses $end
+ incr cross($end)
+ incr end
+ }
+ } else {
+ while {$start < $end} {
+ lappend crosses $start
+ incr cross($start)
+ incr start
+ }
+ }
+ set depc($dkey) $crosses
+ }
+
+ InitializeDeltas $revisions
+ return
+ }
+
+ proc InitializeDeltas {revisions} {
+ upvar 1 delta delta
+
+ # Pull the timestamps for all revisions in the changesets and
+ # compute their deltas for use by the break finder.
+
+ array set delta {}
+ array set stamp {}
+
+ set theset ('[join $revisions {','}]')
+ foreach {rid time} [state run "
+ SELECT R.rid, R.date
+ FROM revision R
+ WHERE R.rid IN $theset
+ "] {
+ set stamp($rid) $time
+ }
+
+ set n 0
+ foreach rid [lrange $revisions 0 end-1] rnext [lrange $revisions 1 end] {
+ set delta($n) [expr {$stamp($rnext) - $stamp($rid)}]
+ incr n
+ }
+ return
+ }
+
+ proc FindBestBreak {range} {
+ upvar 1 cross cross delta delta
+
+ # Determine the best break location in the given range of
+ # positions. First we look for the locations with the maximal
+ # number of crossings. If there are several we look for the
+ # shortest time interval among them. If we still have multiple
+ # possibilities after that we select the earliest location
+ # among these.
+
+ # Note: If the maximal number of crossings is 0 then the range
+ # has no internal dependencies, and no break location at
+ # all. This possibility is signaled via result -1.
+
+ # Note: A range of length 1 or less cannot have internal
+ # dependencies, as that needs at least two revisions in
+ # the range.
+
+ if {[llength $range] < 2} { return -1 }
+
+ set max -1
+ set best {}
+
+ foreach location $range {
+ set crossings $cross($location)
+ if {$crossings > $max} {
+ set max $crossings
+ set best [list $location]
+ continue
+ } elseif {$crossings == $max} {
+ lappend best $location
+ }
+ }
+
+ if {$max == 0} { return -1 }
+ if {[llength $best] == 1} { return [lindex $best 0] }
+
+ set locations $best
+ set best {}
+ set min -1
+
+ foreach location $locations {
+ set interval $delta($location)
+ if {($min < 0) || ($interval < $min)} {
+ set min $interval
+ set best [list $location]
+ } elseif {$interval == $min} {
+ lappend best $location
+ }
+ }
+
+ if {[llength $best] == 1} { return [lindex $best 0] }
+
+ return [lindex [lsort -integer -increasing $best] 0]
+ }
+
+ proc CutAt {location} {
+ upvar 1 cross cross depc depc
+
+ # It was decided to split the changeset at the given
+ # location. This cuts a number of dependencies. Here we update
+ # the cross information so that the break finder has accurate
+ # data when we look at the generated fragments.
+
+ set six [log visible? 6]
+
+ foreach {dep range} [array get depc] {
+ # Check all dependencies still known, take their range and
+ # see if the break location falls within.
+
+ Border $range s e
+ if {$location < $s} continue ; # break before range, ignore
+ if {$location > $e} continue ; # break after range, ignore.
+
+ # This dependency crosses the break location. We remove it
+ # from the crossings counters, and then also from the set
+ # of known dependencies, as we are done with it.
+
+ foreach loc $depc($dep) { incr cross($loc) -1 }
+ unset depc($dep)
+
+ if {!$six} continue
+
+ struct::list assign $dep parent child
+ log write 6 csets "Broke dependency [PD $parent] --> [PD $child]"
+ }
+
+ return
+ }
+
+ # Print identifying data for a revision (project, file, dotted rev
+ # number), for high verbosity log output.
+
+ proc PD {id} {
+ foreach {p f r} [state run {
+ SELECT P.name , F.name, R.rev
+ FROM revision R, file F, project P
+ WHERE R.rid = $id
+ AND R.fid = F.fid
+ AND F.pid = P.pid
+ }] break
+ return "'$p : $f/$r'"
+ }
+
+ # Printing one or more ranges, formatted, and only their border to
+ # keep the strings short.
+
+ proc PRs {ranges} {
+ return [struct::list map $ranges [myproc PR]]
+ }
+
+ proc PR {range} {
+ Border $range s e
+ return <${s}...${e}>
+ }
+
+ proc Border {range sv ev} {
+ upvar 1 $sv s $ev e
+ set s [lindex $range 0]
+ set e [lindex $range end]
+ return
+ }
+
# # ## ### ##### ######## #############
## Configuration
pragma -hastypeinfo no ; # no type introspection
@@ -258,8 +482,10 @@
namespace eval ::vc::fossil::import::cvs::project {
namespace export rev
namespace eval rev {
namespace import ::vc::fossil::import::cvs::state
+ namespace import ::vc::tools::misc::*
+ namespace import ::vc::tools::trouble
namespace import ::vc::tools::log
log register csets
}
}