Check-in [e1dbf3186d]
Not logged in
Overview

SHA1 Hash:e1dbf3186dd40762f79a22957096dfa1a498875a
Date: 2008-02-04 06:05:11
User: aku
Comment:Reworked the revision import to use the new state tracking system instead of the simple array. Moved some log outputs. Added a file listing the known problems to prevent me from forgetting stuff as it piles up :/
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Added cvs2fossil.txt version [0a03724c01]

@@ -1,1 +1,56 @@
+
+Known problems and areas to work on
+===================================
+
+*	Currently not properly tracking when a file is removed on some
+	branch (detectable by a 'dead' revision (optype)) during the
+	import of changesets.
+
+*	Not yet able to handle the specification of multiple projects
+	for one CVS repository. I.e. I can, for example, import all of
+	tcllib, or a single subproject of tcllib, like tklib, but not
+	multiple sub-projects in one go.
+
+*	An internal error thrown when trying to import tcllib of
+	tcllib shows that I am apparently not properly handling the
+	possibility of more than one symbol used to create a
+	vendor-branch with.
+
+	In tcllib most files (18) have 'tcllib-vendor-branch' as the
+	name of their vendor branch, done in 2000, however two files
+	use the name 'vendor' instead, they were done in 2003. Each
+	set of files corresponds a single changeset.
+
+	This causes the code importing the changesets to flip out when
+	the second changeset tries to create ':trunk:' and finds it
+	already existing (both changesets are the last trunk-changeset
+	on the vendor branch :) )
+
+	Not sure yet if I should try to abort this at the beginning,
+	i.e. CVS integrity failure, force the user to manually edit
+	the RCS archives to bring the symbol used for the vendor
+	branch into sync. Or if I should allow the import to let this
+	slide by, by simply assuming that all such second changesets
+	should not try to create the :trunk: if it exists.
+
+*	An internal error thrown when trying to import bwidget of
+	tcllib shows that there have to be some situation I am not
+	handling correctly in the cycle-breaker and sorting passes.
+
+	It tries to import a changeset on the
+	'scriptics-sc-2-0-beta-branch' line of development (X), which
+	has no commits yet. So it goes to the parent LOD to get the
+	state we are spawning from. This parent is
+	'scriptics-sc-1-1-branch' (Y). And is has no changesets
+	committed to it yet. That should not be possible, the ordering
+	constraints should have put all changesets for Y before the
+	changesets of X, and Y had to have at least one changeset,
+	from which the branch could be spawned.
+
+	This need deep diving into the various linkages to understand
+	what is happening, or not happening, depending.
+
+	Note: The code I had before more fully tracking the workspace
+	      state of the various lods wrongly slid over this problem
+	      without erroring out.
 

Modified tools/cvs2fossil/lib/c2f_file.tcl from [8f9fa8b308] to [2d5c2ae5ca].

@@ -275,10 +275,12 @@
 
     # # ## ### ##### ######## #############
     ## Pass XII (Import).
 
     method pushto {repository} {
+	log write 2 file {Importing file "$mypath"}
+
 	set ws [$repository workspace]
 	struct::list assign [$self Expand $ws] filemap revmap
 	# filemap = dict (path -> uuid)
 	# revmap  = dict (path -> rid)
 

Modified tools/cvs2fossil/lib/c2f_pimport.tcl from [2ae743b09e] to [61842ef226].

@@ -22,10 +22,11 @@
 package require snit                                      ; # OO system.
 package require vc::tools::log                            ; # User feedback.
 package require vc::fossil::import::cvs::repository       ; # Repository management.
 package require vc::fossil::import::cvs::state            ; # State storage.
 package require vc::fossil::import::cvs::fossil           ; # Access to fossil repositories.
+package require vc::fossil::import::cvs::ristate          ; # Import state (revisions)
 
 # # ## ### ##### ######## ############# #####################
 ## Register the pass with the management
 
 vc::fossil::import::cvs::pass define \
@@ -84,26 +85,25 @@
 
 	foreach project [repository projects] {
 	    log write 1 import {Importing project "[$project base]"}
 
 	    set fossil [fossil %AUTO%]
+	    set rstate [ristate %AUTO%]
 
 	    state transaction {
 		# Layer I: Files and their revisions
 		foreach file [$project files] {
-		    set path [$file path]
-		    log write 2 import {Importing file "$path"}
 		    $file pushto $fossil
 		}
 		# Layer II: Changesets
-		array set rstate {}
 		foreach {revision date} [$project revisionsinorder] {
-		    log write 2 import {Importing revision [$revision str]}
-		    $revision pushto rstate $fossil $date
+		    $revision pushto $fossil $date $rstate
 		}
 		unset rstate
 	    }
+
+	    $rstate destroy
 
 	    # At last copy the temporary repository file to its final
 	    # destination and release the associated memory.
 
 	    $fossil finalize [$project base].fsl
@@ -140,10 +140,11 @@
     namespace export import
     namespace eval import {
 	namespace import ::vc::fossil::import::cvs::repository
 	namespace import ::vc::fossil::import::cvs::state
 	namespace import ::vc::fossil::import::cvs::fossil
+	namespace import ::vc::fossil::import::cvs::ristate
 	namespace import ::vc::tools::log
 	log register import
     }
 }
 

Modified tools/cvs2fossil/lib/c2f_prev.tcl from [b562462581] to [c36a0d49d0].

@@ -389,13 +389,11 @@
 	if {!$kill} return
 	trouble internal "[$self str] depends on itself"
 	return
     }
 
-    method pushto {sv repository date} {
-	upvar 1 $sv state
-
+    method pushto {repository date rstate} {
 	# Generate and import the manifest for this changeset.
 	#
 	# Data needed:
 	# - Commit message               (-- mysrcid -> repository meta)
 	# - User doing the commit        (s.a.)
@@ -408,19 +406,36 @@
 	# - List of the file revisions in the changeset.
 
 	struct::list assign [$myproject getmeta $mysrcid] __ branch user message
 	struct::list assign $branch __ lodname
 
+	log write 2 csets {Importing revision [$self str] on $lodname}
+
 	# Perform the import. As part of that we determine the parent
 	# we need, and convert the list of items in the changeset into
 	# uuids and printable data.
 
-	set uuid [Updatestate state $lodname \
-		      [$repository importrevision [$self str] \
-			   $user $message $date \
-			   [Getparent state $lodname $myproject $myitems] \
-			   [Getrevisioninfo $myitems]]]
+	struct::list assign [Getisdefault $myitems] isdefault lastdefaultontrunk
+
+	log write 8 csets {LOD    '$lodname'}
+	log write 8 csets { def?  $isdefault}
+	log write 8 csets { last? $lastdefaultontrunk}
+
+	set lws  [Getworkspace    $rstate $lodname $myproject $isdefault]
+	$lws add [Getrevisioninfo $myitems]
+
+	set uuid [$repository importrevision [$self str] \
+		      $user $message $date \
+		      [$lws getid] [$lws get]]
+
+	# Remember the imported changeset in the state, under our
+	# LOD. And if it is the last trunk changeset on the vendor
+	# branch then the revision is also the actual root of the
+	# :trunk:, so we remember it as such in the state.
+
+	$lws defid $uuid
+	if {$lastdefaultontrunk} { $rstate new :trunk: [$lws name] }
 
 	# Remember the whole changeset / uuid mapping, for the tags.
 
 	state run {
 	    INSERT INTO csuuid (cid,   uuid)
@@ -442,80 +457,58 @@
 	    lappend revisions $frid $path $fname/$revnr
 	}
 	return $revisions
     }
 
-    proc Getparent {sv lodname project items} {
-	upvar 1 $sv state
-
-	struct::list assign [Getisdefault $items] isdefault lastdefaultontrunk
-
-	log write 8 csets {LOD    '$lodname'}
-	log write 8 csets { def?  $isdefault}
-	log write 8 csets { last? $lastdefaultontrunk}
-
-	foreach k [lsort [array names state]] {
-	    log write 8 csets {    $k = $state($k)}
-	}
-
-	# See (a) below, we have to remember if the changeset is last
-	# on vendor branch also belonging to trunk even if we find a
-	# parent in the state. The caller will later (after import)
-	# make us the first trunk changeset in the state (See (**)).
-
-	if {$lastdefaultontrunk} {
-	    set state(:vendor:last:) .
-	}
-
-	# The state array holds for each line-of-development (LOD) the
-	# last committed changeset belonging to that LOD.
+    proc Getworkspace {rstate lodname project isdefault} {
+
+	# The state object holds the workspace state of each known
+	# line-of-development (LOD), up to the last committed
+	# changeset belonging to that LOD.
 
 	# (*) Standard handling if in-LOD changesets. If the LOD of
 	#     the current changeset exists in the state (= has been
-	#     committed to) then the stored changeset is the parent we
-	#     are looking for.
-
-	if {[info exists state($lodname)]} {
-	    return $state($lodname)
-	}
-
-	# If the LOD is not yet known the current changeset can either
-	# be
-	# (a) the root of a vendor branch,
-	# (b) the root of the trunk LOD, or
+	#     committed to) then this it has the workspace we are
+	#     looking for.
+
+	if {[$rstate has $lodname]} {
+	    return [$rstate get $lodname]
+	}
+
+	# If the LOD is however not yet known, then the current
+	# changeset can be either of
+	# (a) root of a vendor branch,
+	# (b) root of the trunk LOD, or
 	# (c) the first changeset in a new LOD which was spawned from
 	#     an existing LOD.
 
-	if {$isdefault} {
-	    # In case of (a) the changeset has no parent, signaled by
-	    # the empty string. We do remember if the changeset is
-	    # last on the vendor branch still belonging to trunk, for
-	    # the trunk root.
-	    return {}
-	}
-
-	if {$lodname eq ":trunk:"} {
-	    # This is case (b), and we also can be sure that there is
-	    # no vendor branch changeset which could be our
-	    # parent. That was already dealt with through the
-	    # :vendor:last: signal and code in the caller (setting
-	    # such a changeset up as parent in the state, causing the
-	    # standard LOD handler at (*) to kick in. So, no parent
-	    # here at all.
-	    return {}
-	}
-
-	# Case (c). We find the parent LOD of our LOD and take the
-	# last changeset committed to that as our parent. If that
-	# doesn't exist we have an error on our hands.
+	if {$isdefault || ($lodname eq ":trunk:")} {
+	    # For both (a) and (b) we have to create a new workspace
+	    # for the lod, and it doesn't inherit from anything.
+
+	    # Note that case (b) may never occur. See the variable
+	    # 'lastdefaultontrunk' in the caller (method pushto). This
+	    # flag can the generation of the workspace for the :trunk:
+	    # LOD as well, making it inherit the state of the last
+	    # trunk-changeset on the vendor-branch.
+
+	    return [$rstate new $lodname]
+	}
+
+	# Case (c). We find the parent LOD of our LOD and let the new
+	# workspace inherit from the parent's workspace.
 
 	set plodname [[[$project getsymbol $lodname] parent] name]
 
 	log write 8 csets {pLOD   '$plodname'}
 
-	if {[info exists state($plodname)]} {
-	    return $state($plodname)
+	if {[$rstate has $plodname]} {
+	    return [$rstate new $lodname $plodname]
+	}
+
+	foreach k [lsort [$rstate names]] {
+	    log write 8 csets {    $k = [[$rstate get $k] getid]}
 	}
 
 	trouble internal {Unable to determine changeset parent}
 	return
     }
@@ -532,27 +525,10 @@
 
 	# TODO/CHECK: look for changesets where isdefault/dbchild is
 	# ambigous.
 
 	return [list $def [expr {$last ne ""}]]
-    }
-
-    proc Updatestate {sv lodname uuid} {
-	upvar 1 $sv state
-
-	# Remember the imported changeset in the state, under our
-	# LOD. (**) And if the :vendor:last: signal is present then
-	# the revision is also the actual root of the :trunk:, so
-	# remember it as such.
-
-	set state($lodname) $uuid
-	if {[info exists state(:vendor:last:)]} {
-	    unset state(:vendor:last:)
-	    set state(:trunk:) $uuid
-	}
-
-	return $uuid
     }
 
     typemethod split {cset args} {
 	# As part of the creation of the new changesets specified in
 	# ARGS as sets of items, all subsets of CSET's item set, CSET