Check-in [f888f06fe3]
Not logged in
Overview

SHA1 Hash:f888f06fe35bc379d20ea71303e88d205a5106e9
Date: 2007-11-02 06:06:24
User: aku
Comment:Continued work on pass 3, added code to determine the type of symbols based on the tag-, branch-, and commit-counts. Hook for handling data coming from the option processoris present (UserConfig), but only as a placeholder.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified tools/cvs2fossil/lib/c2f_pcollrev.tcl from [d447e0dea4] to [98fd60f570].

@@ -14,18 +14,19 @@
 ## all the information they contain (revisions, and symbols).
 
 # # ## ### ##### ######## ############# #####################
 ## Requirements
 
-package require Tcl 8.4                             ; # Required runtime.
-package require snit                                ; # OO system.
-package require vc::tools::trouble                  ; # Error reporting.
-package require vc::tools::log                      ; # User feedback.
-package require vc::fossil::import::cvs::pass       ; # Pass management.
-package require vc::fossil::import::cvs::repository ; # Repository management.
-package require vc::fossil::import::cvs::state      ; # State storage.
-package require vc::rcs::parser                     ; # Rcs archive data extraction.
+package require Tcl 8.4                               ; # Required runtime.
+package require snit                                  ; # OO system.
+package require vc::tools::trouble                    ; # Error reporting.
+package require vc::tools::log                        ; # User feedback.
+package require vc::fossil::import::cvs::pass         ; # Pass management.
+package require vc::fossil::import::cvs::repository   ; # Repository management.
+package require vc::fossil::import::cvs::state        ; # State storage.
+package require vc::fossil::import::cvs::project::sym ; # Project level symbols
+package require vc::rcs::parser                       ; # Rcs archive data extraction.
 
 # # ## ### ##### ######## ############# #####################
 ## Register the pass with the management
 
 vc::fossil::import::cvs::pass define \
@@ -203,13 +204,15 @@
 	    n   INTEGER  NOT NULL,                    -- How often pid can act as parent.
 	    UNIQUE (sid, pid)
 	}
 
 	state writing symtype {
-	    tid   INTEGER  NOT NULL  PRIMARY KEY,
-	    name  TEXT     NOT NULL,
+	    tid    INTEGER  NOT NULL  PRIMARY KEY,
+	    name   TEXT     NOT NULL,
+	    plural TEXT     NOT NULL,
 	    UNIQUE (name)
+	    UNIQUE (plural)
 	}
 	state run {
 	    INSERT INTO symtype VALUES (0,'excluded');
 	    INSERT INTO symtype VALUES (1,'tag');
 	    INSERT INTO symtype VALUES (2,'branch');
@@ -255,17 +258,20 @@
 	state writing cmessage {
 	    cid  INTEGER  NOT NULL  PRIMARY KEY  AUTOINCREMENT,
 	    text TEXT     NOT NULL  UNIQUE
 	}
 
+	project::sym getsymtypes
 	return
     }
 
     typemethod load {} {
 	state reading symbol
-
-	repository loadsymbols
+	state reading symtype
+
+	project::sym getsymtypes
+	repository   loadsymbols
 	return
     }
 
     typemethod run {} {
 	# Pass manager interface. Executed to perform the
@@ -589,10 +595,13 @@
     namespace export collrev
     namespace eval collrev {
 	namespace import ::vc::rcs::parser
 	namespace import ::vc::fossil::import::cvs::repository
 	namespace import ::vc::fossil::import::cvs::state
+	namespace eval project {
+	    namespace import ::vc::fossil::import::cvs::project::sym
+	}
 	namespace import ::vc::tools::trouble
 	namespace import ::vc::tools::log
 	log register collrev
     }
 }

Modified tools/cvs2fossil/lib/c2f_pcollsym.tcl from [529d76b227] to [77182f7016].

@@ -16,19 +16,20 @@
 ## 'FilterSym', which performs the actual deletion.
 
 # # ## ### ##### ######## ############# #####################
 ## Requirements
 
-package require Tcl 8.4                             ; # Required runtime.
-package require snit                                ; # OO system.
-#package require fileutil::traverse                  ; # Directory traversal.
-#package require fileutil                            ; # File & path utilities.
-#package require vc::tools::trouble                  ; # Error reporting.
-package require vc::tools::log                      ; # User feedback.
-#package require vc::fossil::import::cvs::pass       ; # Pass management.
-#package require vc::fossil::import::cvs::repository ; # Repository management.
-package require vc::fossil::import::cvs::state      ; # State storage.
+package require Tcl 8.4                               ; # Required runtime.
+package require snit                                  ; # OO system.
+#package require fileutil::traverse                    ; # Directory traversal.
+#package require fileutil                              ; # File & path utilities.
+#package require vc::tools::trouble                    ; # Error reporting.
+package require vc::tools::log                        ; # User feedback.
+#package require vc::fossil::import::cvs::pass         ; # Pass management.
+package require vc::fossil::import::cvs::repository   ; # Repository management.
+package require vc::fossil::import::cvs::state        ; # State storage.
+package require vc::fossil::import::cvs::project::sym ; # Project level symbols
 
 # # ## ### ##### ######## ############# #####################
 ## Register the pass with the management
 
 vc::fossil::import::cvs::pass define \
@@ -61,10 +62,17 @@
 
     typemethod run {} {
 	# Pass manager interface. Executed to perform the
 	# functionality of the pass.
 
+	state transaction {
+	    repository   determinesymboltypes
+
+	    project::sym printrulestatistics
+	    project::sym printtypestatistics
+	}
+
 	log write 1 collsym "Collation completed"
 	return
     }
 
     typemethod discard {} {
@@ -89,12 +97,15 @@
 }
 
 namespace eval ::vc::fossil::import::cvs::pass {
     namespace export collsym
     namespace eval collsym {
-	#namespace import ::vc::fossil::import::cvs::repository
+	namespace import ::vc::fossil::import::cvs::repository
 	namespace import ::vc::fossil::import::cvs::state
+	namespace eval project {
+	    namespace import ::vc::fossil::import::cvs::project::sym
+	}
 	#namespace import ::vc::tools::trouble
 	namespace import ::vc::tools::log
 	log register collsym
     }
 }

Modified tools/cvs2fossil/lib/c2f_project.tcl from [974891062e] to [39d1bb0e92].

@@ -93,10 +93,17 @@
 		log write 3 project "$mybase: Deleting ghost symbol '$name'"
 		$symbol destroy
 		unset mysymbol($name)
 		set changes 1
 	    }
+	}
+	return
+    }
+
+    method determinesymboltypes {} {
+	foreach {name symbol} [array get mysymbol] {
+	    $symbol determinetype
 	}
 	return
     }
 
     # pass I persistence

Modified tools/cvs2fossil/lib/c2f_psym.tcl from [08079c4d5a] to [9138dfe62a].

@@ -13,14 +13,17 @@
 ## Symbols (Tags, Branches) per project.
 
 # # ## ### ##### ######## ############# #####################
 ## Requirements
 
-package require Tcl 8.4                                 ; # Required runtime.
-package require snit                                    ; # OO system.
-package require struct::set                             ; # Set handling.
-package require vc::fossil::import::cvs::state          ; # State storage.
+package require Tcl 8.4                               ; # Required runtime.
+package require snit                                  ; # OO system.
+package require vc::tools::trouble                    ; # Error reporting.
+package require vc::tools::log                        ; # User feedback.
+package require vc::tools::misc                       ; # Text formatting.
+package require vc::fossil::import::cvs::state        ; # State storage.
+package require struct::set                           ; # Set handling.
 
 # # ## ### ##### ######## ############# #####################
 ##
 
 snit::type ::vc::fossil::import::cvs::project::sym {
@@ -29,15 +32,58 @@
 
     constructor {name id project} {
 	set myname    $name
 	set myid      $id
 	set myproject $project
+
+	# Count total number of symbols.
+	incr mynum
 	return
     }
 
     method name {} { return $myname }
     method id   {} { return $myid   }
+
+    # # ## ### ##### ######## #############
+    ## Symbol type
+
+    method determinetype {} {
+	# This is done by a fixed heuristics, with guidance by the
+	# user in edge-cases. Contrary to cvs2svn which uses a big
+	# honking streagy class and rule objects. Keep it simple, we
+	# can expand later when we actually need all the complexity
+	# for configurability.
+
+	# The following guidelines are applied:
+	# - Is usage unambigous ?
+	# - Was there ever a commit on the symbol ?
+	# - More used as tag, or more used as branch ?
+	# - At last, what has the user told us about it ?
+	# - Fail
+
+	foreach rule {
+	    UserConfig
+	    Unambigous
+	    HasCommits
+	    VoteCounts
+	} {
+	   set chosen [$self $rule]
+	   if {$chosen eq $myundef} continue
+	   $self MarkAs $rule $chosen
+	   return
+	}
+
+	# None of the above was able to decide which type to assign to
+	# the symbol. This is a fatal error preventing the execution
+	# of the passes after 'CollateSymbols'.
+
+	incr myrulecount(Undecided_)
+	trouble fatal "Unable to decide how to convert symbol '$myname'"
+	return
+    }
+
+    method markthetrunk {} { $self MarkAs IsTheTrunk $mybranch ; return }
 
     # # ## ### ##### ######## #############
     ## Symbol statistics
 
     method defcounts {tc bc cc} {
@@ -127,37 +173,157 @@
 
     variable mypparent -array {} ; # Maps from symbols to the number
 				   # of files in which it could have
 				   # been a parent of this symbol.
 
+    variable mytype {} ; # The type chosen for the symbol to use in
+			 # the conversion.
+
+    # # ## ### ##### ######## #############
+
+    typemethod getsymtypes {} {
+	foreach {tid name} [state run {
+	    SELECT tid, name FROM symtype;
+	}] { set mysymtype($tid) $name }
+	return
+    }
+
     # Keep the codes below in sync with 'pass::collrev/setup('symtype').
-    typevariable myexcluded 0 ; # Code for symbols which are excluded.
-    typevariable mytag      1 ; # Code for symbols which are tags.
-    typevariable mybranch   2 ; # Code for symbols which are branches.
-    typevariable myundef    3 ; # Code for symbols of unknown type.
+    typevariable myexcluded        0 ; # Code for symbols which are excluded.
+    typevariable mytag             1 ; # Code for symbols which are tags.
+    typevariable mybranch          2 ; # Code for symbols which are branches.
+    typevariable myundef           3 ; # Code for symbols of unknown type.
+    typevariable mysymtype -array {} ; # Map from type code to label for the log.
+
+    typemethod printrulestatistics {} {
+	log write 2 symbol "Rule usage statistics:"
+
+	set fmt %[string length $mynum]s
+	set all 0
+
+	foreach key [lsort [array names myrulecount]] {
+	    log write 2 symbol "* [format $fmt $myrulecount($key)] $key"
+	    incr all $myrulecount($key)
+	}
+
+	log write 2 symbol "= [format $fmt $all] total"
+	return
+    }
+
+    # Statistics on how often each 'rule' was used to decide on the
+    # type of a symbol.
+    typevariable myrulecount -array {
+	HasCommits 0
+	IsTheTrunk 0
+	Unambigous 0
+	Undecided_ 0
+	UserConfig 0
+	VoteCounts 0
+    }
+
+    typemethod printtypestatistics {} {
+	log write 2 symbol "Symbol type statistics:"
+
+	set fmt %[string length $mynum]s
+	set all 0
+
+	foreach {stype splural n} [state run {
+	    SELECT T.name, T.plural, COUNT (s.sid)
+	    FROM symbol S, symtype T
+	    WHERE S.type = T.tid
+	    GROUP BY T.name
+	    ORDER BY T.name
+	    ;
+	}] {
+	    log write 2 symbol "* [format $fmt $n] [sp $n $stype $splural]"
+	    incr all $n
+	}
+
+	log write 2 symbol "= [format $fmt $all] total"
+	return
+    }
+
+    typevariable mynum 0
 
     # # ## ### ##### ######## #############
     ## Internal methods
 
+    method UserConfig {} {
+	# No user based guidance yet.
+	return $myundef
+    }
+
+    method Unambigous {} {
+	# If a symbol is used unambiguously as a tag/branch, convert
+	# it as such.
+
+	set istag    [expr {$mytagcount    > 0}]
+	set isbranch [expr {$mybranchcount > 0 || $mycommitcount > 0}]
+
+	if {$istag && $isbranch} { return $myundef  }
+	if {$istag}              { return $mytag    }
+	if {$isbranch}           { return $mybranch }
+
+	# Symbol was not used at all.
+	return $myundef
+    }
+
+    method HasCommits {} {
+	# If there was ever a commit on the symbol, convert it as a
+	# branch.
+
+	if {$mycommitcount > 0} { return $mybranch }
+	return $myundef
+    }
+
+    method VoteCounts {} {
+	# Convert the symbol based on how often it was used as a
+	# branch/tag. Whichever happened more often determines how the
+	# symbol is converted.
+
+	if {$mytagcount > $mybranchcount} { return $mytag }
+	if {$mytagcount < $mybranchcount} { return $mybranch }
+	return $myundef
+    }
+
+    method MarkAs {label chosen} {
+	log write 3 symbol "\[$label\] Converting symbol '$myname' as $mysymtype($chosen)"
+
+	set mytype $chosen
+	incr myrulecount($label)
+
+	# This is stored directly into the database.
+	state run {
+	    UPDATE symbol
+	    SET type = $chosen
+	    WHERE sid = $myid
+	    ;
+	}
+	return
+    }
+
     # # ## ### ##### ######## #############
     ## Configuration
 
     pragma -hastypeinfo    no  ; # no type introspection
     pragma -hasinfo        no  ; # no object introspection
-    pragma -hastypemethods no  ; # type is not relevant.
     pragma -simpledispatch yes ; # simple fast dispatch
 
     # # ## ### ##### ######## #############
 }
 
 namespace eval ::vc::fossil::import::cvs::project {
     namespace export sym
     namespace eval sym {
 	namespace import ::vc::fossil::import::cvs::state
+	namespace import ::vc::tools::misc::*
+	namespace import ::vc::tools::trouble
+	namespace import ::vc::tools::log
+	log register symbol
     }
 }
 
 # # ## ### ##### ######## ############# #####################
 ## Ready
 
 package provide vc::fossil::import::cvs::project::sym 1.0
 return

Modified tools/cvs2fossil/lib/c2f_ptrunk.tcl from [6237ca73d6] to [b20f9bb07d].

@@ -52,10 +52,12 @@
     method possibleparent {symbol} {}
 
     method isghost {} { return 0 }
 
     delegate method persistrev to mysymbol
+
+    method determinetype {} { $mysymbol markthetrunk }
 
     # # ## ### ##### ######## #############
     ## State
 
     typevariable myname   :trunk: ; # Name shared by all trunk symbols.

Modified tools/cvs2fossil/lib/c2f_repository.tcl from [9474ee1654] to [7a9e86cef9].

@@ -207,10 +207,17 @@
 		# this is used to load the pass II data, which means
 		# that everything is 'undefined' at this point anyway.
 
 		# future: $symbol load (blockers, and parents)
 	    }
+	}
+	return
+    }
+
+    typemethod determinesymboltypes {} {
+	foreach project [TheProjects] {
+	    $project determinesymboltypes
 	}
 	return
     }
 
     # # ## ### ##### ######## #############