Diff
Not logged in

Differences From:

File tools/import-cvs.tcl part of check-in [f166b0a63c] - Added first code regarding import from cvs, processing a CVSROOT/history file. Looks good, except that the history I have is incomplete, truncated at the beginning. Extended my notes with results from this experiment, thinking about a possible different method. by aku on 2007-08-31 04:57:33. [view]

To:

File tools/import-cvs.tcl part of check-in [df91d389d5] - First semi-complete app for import from CVS. Trunk only, wholesale only. by aku on 2007-09-04 05:36:56. [view]

@@ -2,369 +2,184 @@
 # -*- tcl -*- \
 exec tclsh "$0" ${1+"$@"}
 
 # -----------------------------------------------------------------------------
+
+# Import the trunk of a CVS repository wholesale into a fossil repository.
+
+# Limitations implicitly mentioned:
+# - No incremental import.
+# - No import of branches.
+
+# WIBNI features (beyond eliminating the limitations):
+# - Restrict import to specific directory subtrees (SF projects use
+#   one repository for several independent modules. Examples: tcllib
+#   -> tcllib, tklib, tclapps, etc.). The restriction would allow import
+#   of only a specific module.
+# - Related to the previous, strip elements from the base path to keep
+#   it short.
+# - Export to CVS, trunk, possibly branches. I.e. extend the system to be
+#   a full bridge. Either Fossil or CVS could be the master repository.
+
+# HACKS. I.e. I do not know if the 'fixes' I use are the correct way
+#        of handling the encountered situations.
+#
+# - File F has archives F,v and Attic/F,v. Currently I will ignore the
+#   file in the Attic.
+#   Examples: sqlite/os_unix.h
+#
+# - A specific revision of a file F cannot be checked out (reported
+#   error is 'invalid change text'). This indicates a corrupt RCS
+#   file, one or more delta are bad. We report but ignore the problem
+#   in a best-effort attempt at getting as much history as possible.
+#   Examples: tcllib/tklib/modules/tkpiechart/pie.tcl
+
+# -----------------------------------------------------------------------------
 # Make private packages accessible.
 
 lappend auto_path [file join [file dirname [info script]] lib]
-package require rcsparser
-package require fileutil
+
+# -----------------------------------------------------------------------------
+# Requirements
+
+package Tcl 8.4
+package require cvs    ; # Frontend, reading from source repository
+package require fossil ; # Backend,  writing to destination repository.
 
 # -----------------------------------------------------------------------------
-# Repository management (CVS)
-
-namespace eval ::cvs {
-    variable base   ; set       base   {} ; # Repository toplevel directory.
-    variable npaths ; array set npaths {} ; # path -> actual path mapping.
-    variable rpaths ; array set rpaths {} ; # path -> rcs file mapping.
-    variable cmsg   ; array set cmsg   {} ; # Cache of commit messages.
-}
-
-proc ::cvs::hextime {hex} {
-    set t 0
-    foreach d [string map {
-	a 10 b 11 c 12 d 13 e 14 f 15
-	A 10 B 11 C 12 D 13 E 14 F 15
-    } [split $hex {}]] {
-	set t [expr {($t << 4) + $d}];#horner
+
+proc main {} {
+    global argv tot nto cvs fossil ntrunk
+
+    commandline
+
+    fossil::feedback Write ; # Setup progress feedback from the libraries
+    cvs::feedback    Write
+
+    cvs::at       $cvs  ; # Define location of CVS repository
+    cvs::scan           ; # Gather revision data from the archives
+    cvs::csets          ; # Group changes into sets
+    cvs::rtree          ; # Build revision tree (trunk only right now).
+
+    set tot 0.0
+    set nto 0
+
+    Write info {Importing ...}
+    Write info {    Setting up cvs workspace and temporary fossil repository}
+
+    cvs::workspace ; # cd's to workspace
+    fossil::new    ; # Uses cwd as workspace to connect to.
+
+    set ntrunk [cvs::ntrunk]
+    cvs::foreach_cset cset [cvs::root] {
+	import $cset
     }
-    return $t
-}
-
-proc ::cvs::at {path} {
-    variable base $path
+    cvs::wsclear
+
+    Write info "    ========= [string repeat = 61]"
+    Write info "    Imported $nto [expr {($nto == 1) ? "changeset" : "changesets"}]"
+    Write info "    Within [format %.2f $tot] seconds (avg [format %.2f [expr {$tot/$nto}]] seconds/changeset)"
+
+    Write info {    Moving to final destination}
+
+    fossil::destination $fossil
+
+    Write info Ok.
     return
 }
 
-proc ::cvs::cmsg {path rev} {
-    variable cmsg
-    set key [list $path $rev]
-
-    if {![info exists cmsg($key)]} {
-	set rcs [cvs::rcsfile $path]
-
-	#puts stderr "scan $path => $rcs"
-
-	array set p [::rcsparser::process $rcs]
-
-	foreach {r msg} $p(commit) {
-	    set cmsg([list $path $r]) $msg
+
+# -----------------------------------------------------------------------------
+
+proc commandline {} {
+    global argv cvs fossil nosign log
+
+    set nosign 0
+    while {[string match "-*" [set opt [lindex $argv 0]]]} {
+	if {$opt eq "--nosign"} {
+	    set nosign 1
+	    set argv [lrange $argv 1 end]
+	    continue
 	}
-
-	if {![info exists cmsg($key)]} {
-	    return -code error "Bogus revision $rev of file $path"
-	}
+	usage
+    }
+    if {[llength $argv] != 2} usage
+    foreach {cvs fossil} $argv break
+
+    if {
+	![file exists      $cvs] ||
+	![file readable    $cvs] ||
+	![file isdirectory $cvs]
+    } {
+	usage "CVS directory missing, not readable, or not a directory."
+    } elseif {[file exists $fossil]} {
+	usage "Fossil destination repository exists already."
     }
 
-    return $cmsg($key)
+    set log [open ${fossil}.log w]
+    return
+}
+
+proc usage {{text {}}} {
+    global argv0
+    puts stderr "Usage: $argv0 ?--nosign? cvs-repository fossil-rpeository"
+    if {$text eq ""} return
+    puts stderr "       $text"
+    exit
 }
 
-proc ::cvs::norm {path} {
-    variable npaths
-    if {![info exists npaths($path)]} {
-	set npaths($path) [NormFile $path]
-    }
-    return $npaths($path)
+proc import {cset} {
+    global tot nto nosign ntrunk
+    Write info "    Importing $cset [string repeat = [expr {60 - [string length $cset]}]]"
+    Write info "        At $nto/$ntrunk ([format %.2f [expr {double($nto)/$ntrunk}]]%)"
+
+    set usec [lindex [time {
+	foreach {uuid ad rm ch} [fossil::commit cvs2fossil $nosign \
+				     [cvs::wssetup $cset] \
+				     ::cvs::wsignore] break
+    } 1] 0]
+    cvs::uuid $cset $uuid
+
+    set sec [expr {$usec/1e6}]
+    set tot [expr {$tot + $sec}]
+    incr nto
+
+    Write info "        == $uuid +${ad}-${rm}*${ch}"
+    Write info "        in $sec seconds"
+
+    set avg [expr {$tot/$nto}]
+    set max [expr {$ntrunk * $avg}]
+    set rem [expr {$max - $tot}]
+
+    Write info "        st avg [format %.2f $avg]"
+    Write info "        st run [format %7.2f $tot] sec [format %6.2f [expr {$tot/60}]] min [format %5.2f [expr {$tot/3600}]] hr"
+    Write info "        st end [format %7.2f $max] sec [format %6.2f [expr {$max/60}]] min [format %5.2f [expr {$max/3600}]] hr"
+    Write info "        st rem [format %7.2f $rem] sec [format %6.2f [expr {$rem/60}]] min [format %5.2f [expr {$rem/3600}]] hr"
+    return
 }
 
-proc ::cvs::NormFile {path} {
-    variable base
-
-    set f $base/$path,v
-    if {[::file exists $f]} {return $path}
-
-    set hd [::file dirname $path]
-    set tl [::file tail    $path]
-
-    set f $base/$hd/Attic/$tl,v
-    if {[::file exists $f]} {return $path}
+# -----------------------------------------------------------------------------
 
-    # Bad. The dir can be truncated, i.e. it may not be an exact
-    # subdirectory of base, but deeper inside, with parents between it
-    # and base left out. Example (from the tcllib history file):
-    #
-    # M3f1d1245|afaupell|<remote>|ipentry|1.2|ChangeLog
-    # The correct path is 'tklib/modules/ipentry'.
-    # This we have to resolve too.
-
-    # normalize dance - old fileutil, modern fileutil (cvs head) doesn't do that.
-    set bx [file normalize $base]
-    foreach c [fileutil::findByPattern $bx -glob $hd] {
-	set cx [fileutil::stripPath $bx $c]
-	set c  $base/$cx
-
-	set f $c/$tl,v
-	if {[::file exists $f]} {return $cx/$tl}
-	set f $c/Attic/$tl,v
-	if {[::file exists $f]} {return $cx/$tl}
-    }
-
-    puts stderr <$f>
-    return -code error "Unable to locate actual file for $path"
+array set fl {
+    debug   {DEBUG  }
+    info    {       }
+    warning {Warning}
+    error   {ERROR  }
 }
 
-proc ::cvs::rcsfile {path} {
-    variable rpaths
-    if {![info exists rpaths($path)]} {
-	set rpaths($path) [RcsFile $path]
-    }
-    return $rpaths($path)
-}
-
-proc ::cvs::RcsFile {path} {
-    variable base
-
-    set f $base/$path,v
-    if {[::file exists $f]} {return $f}
-
-    set hd [::file dirname $path]
-    set tl [::file tail    $path]
-
-    set f $base/$hd/Attic/$tl,v
-    if {[::file exists $f]} {return $f}
-
-    # We do not have truncated directories here, assuming that only
-    # norm paths are fed into this command.
+proc Write {l t} {
+    global fl log
 
-    if 0 {
-	# Bad. The dir can be truncated, i.e. it may not be an exact
-	# subdirectory of base, but deeper inside, with parents
-	# between it and base left out. Example (from the tcllib
-	# history file):
-	#
-	# M3f1d1245|afaupell|<remote>|ipentry|1.2|ChangeLog The
-	# correct path is 'tklib/modules/ipentry'.  This we have to
-	# resolve too.
-
-	# normalize dance - old fileutil, modern fileutil (cvs head)
-	# doesn't do that.
-	set bx [file normalize $base]
-	foreach c [fileutil::findByPattern $bx -glob $hd] {
-	    set c $base/[fileutil::stripPath $bx $c]
-
-	    set f $c/$tl,v
-	    if {[::file exists $f]} {return $f}
-	    set f $c/Attic/$tl,v
-	    if {[::file exists $f]} {return $f}
-	}
+    if {[string index $t 0] eq "\r"} {
+	puts -nonewline stdout "\r$fl($l) [string range $t 0 end-1]"
+    } else {
+	puts stdout "$fl($l) $t"
+	puts $log   "$fl($l) $t"
     }
-
-    puts stderr <$f>
-    return -code error "Unable to locate rcs file for $path"
-}
-
-proc ::cvs::history {} {
-    variable base
-    return $base/CVSROOT/history
+    flush stdout
+    return
 }
 
 # -----------------------------------------------------------------------------
 
-# -----------------------------------------------------------------------------
-
-cvs::at [lindex $argv 0]
-
-#puts [::cvs::norm ipentry/ChangeLog]
-#exit
-
-#changeset state
-global cs csf
-array set cs {
-    start {} end {} cm {}
-    usr   {} dt  {}
-}
-array set csf {}
-
-proc rh {} {
-    global argv cs csf repo
-
-    set f [open [cvs::history] r]
-
-    while {[gets $f line] >= 0} {
-	# Decode line
-	foreach {op usr _ dir rev file} [split [string trim $line] |] break
-	set ts [cvs::hextime [string range $op 1 end]]
-	set op [string index $op 0]
-
-	# Filter out irrelevant parts
-	if {$op eq "O"} continue ; # checkout
-	if {$op eq "F"} continue ; # release
-	if {$op eq "T"} continue ; # rtag
-	if {$op eq "W"} continue ; # delete on update
-	if {$op eq "U"} continue ; # update
-	if {$op eq "P"} continue ; # update by patch
-	#if {$op eq "G"} continue ; # merge on update - FUTURE - identifies mergepoints.
-	if {$op eq "C"} continue ; # conflict on update - s.a.
-	if {$op eq "E"} continue ; # export
-	# left types
-	# M: commit
-	# A: addition
-	# R: removal
-
-	set df $dir/$file
-	if {[newcs $op $usr $ts $rev df cause]} {
-
-	    # NOTE 1: ChangeSets containing CVSROOT => remove such files.
-	    # NOTE 2: Empty changesets, ignore.
-
-	    #commit
-	    csstats
-
-	    if {$cause eq "cmsg"} {
-set msg
-	    } else {
-set msg ""
-	    }
-
-	    if {$cs(end) ne ""} {
-		puts =============================/$cause\ delta\ [expr {$ts - $cs(end)}]
-	    } else {
-		puts =============================/$cause
-	    }
-	    csclear
-	}
-
-	# Note: newcs normalizes df, in case the log information is
-	# bogus. So the df here may be different from before newcs
-	csadd $op $usr $ts $rev $df
-	# apply modification to workspace
-    }
-}
-
-proc newcs {op usr ts rev dfv rv} {
-    global cs csf
-    upvar 1 $rv reason $dfv df
-
-    # Logic to detect when a new change set begins. A new change sets
-    # has started with the current entry when the entry
-    #
-    # 1. is for a different user than the last.
-    # 2. tries to add a file to the changeset which is already part of it.
-    # 3.is on the trunk, and the last on a branch, or vice versa.
-    # 4. the current entry has a different commit message than the last.
-
-    set df [cvs::norm $df]
-
-    # User changed
-    if {$usr ne $cs(usr)} {
-	set reason user
-	return 1
-    }
-
-    # File is already in the changeset
-    if {[info exists csf($df)]} {
-	set reason file
-	return 1
-    }
-
-    # last/current are different regarding trunk/branch
-    set depth [llength [split $rev .]]
-    if {($cs(lastd) == 2) != ($depth == 2)} {
-	set reason branch
-	return 1
-    }
-
-    # Commit message changed
-    if {[cvs::cmsg $cs(lastf) $cs(lastr)] ne [cvs::cmsg $df $rev]} {
-	set reason cmsg
-	return 1
-    }
-
-    # Same changeset
-    return 0
-}
-
-proc csclear {} {
-    global cs csf
-    array set cs {start {} usr {} end {} dt {}}
-    array unset csf *
-    return
-}
-
-proc csadd {op usr ts rev df} {
-    global cs csf
-
-    if {$cs(usr) eq ""} {set cs(usr) $usr}
-    if {$cs(start) eq ""} {
-	set cs(start) $ts
-    } else {
-	lappend cs(dt) [expr {$ts - $cs(end)}]
-    }
-    set cs(end) $ts
-
-    set csf($df) [list $op $rev]
-    set cs(lastf) $df
-    set cs(lastr) $rev
-    set cs(lastd) [llength [split $rev .]]
-
-    puts [list $op [clock format $ts] $usr $rev $df]
-    return
-}
-
-proc csstats {} {
-    global cs csf
-
-    if {$cs(start) eq ""} return
-
-    puts "files: [array size csf]"
-    puts "delta: $cs(dt)"
-    puts "range: [expr {$cs(end) - $cs(start)}] seconds"
-    return
-}
-
-rh
-
-exit
-
-=========================================
-new fossil
-new fossil workspace
-
-open history
-
-foreach line {
-    ignore unwanted lines
-
-    accumulate changesets data
-    new change-set => commit and continue
-
-    current branch and branch of new change different ?
-    => move fossil workspace to proper revision.
-
-    apply change to workspace
-    uncommitted
-}
-
-if uncommitted => commit
-delete workspace
-copy fossil repo to destination
-=========================================
-
-Not dealt with in outline: branches, tags, merging
-
-=========================================
-
-complexities
-- apply to workspace
-  - remove simple, direct translation
-  - add => requires extraction of indicated revision from ,v
-  - modify => see above (without add following)
-
-- ,v file => Can be the either dir/file,v, or dir/Attic/file,v
-  Both ? Priority ?
-
-- How to detect changes on branches ?
-
-- Have to keep knowledge of which branches went there.
- => save change-sets information, + uuid in fossil
- => need only the leaves of each branch, and of branch points.
- => better keep all until complete.
- => uuid can be gotten from 'manifest.uuid' in workspace.
-- keep tag information ? (symbolics)
-
-=========================================
-
-CVSROOT=ORIGIN
-
-cvs -d ORIGIN checkout -r REV FILE
-Extract specific revision of a specific file.
--q, -Q quietness
+main
+exit