Fossil: Diff

Differences From:

File tools/cvs2fossil/lib/c2f_prev.tcl part of check-in [712010580a] - Bugfix. Have the symbol dependency retrieval commands actually return something. by aku on 2007-12-02 04:55:38. [view]

To:

File tools/cvs2fossil/lib/c2f_prev.tcl part of check-in [9c57055025] - Performance bugfix. nextmap/premap can still be performance killers and memory hogs. Moved the computation of sucessor changesets down to the type-dependent code (new methods) and the SQL database, i.e. the C level. In the current setup it was possible that the DB would deliver us millions of file-level dependency pairs which the Tcl level would then reduce to tens of actual changeset dependencies. Tcl did not cope well with that amount of data. Now the reduction happens in the query itself. A concrete example was a branch in the Tcl CVS generating nearly 9 million pairs, which reduced to roughly 200 changeset dependencies. This blew the memory out of the water and the converter ground to a halt, busily swapping. Ok, causes behind us, also added another index on 'csitem(iid)' to speed the search for changesets from the revisions, tags, and branches. by aku on 2007-12-02 05:49:00. [view]

@@ -88,11 +88,23 @@
 
     method setpos {p} { set mypos $p ; return }
     method pos    {}  { return $mypos }
 
+    # result = list (changeset)
+    method successors {} {
+	return [struct::list map \
+		    [$mytypeobj cs_successors $myitems] \
+		    [mytypemethod of]]
+    }
+
     # result = dict (item -> list (changeset))
     method successormap {} {
-	# NOTE / FUTURE: Possible bottleneck.
+	# NOTE / FUTURE: Definitive bottleneck (can be millions of pairs).
+	#
+	# Only user is pass 9, computing the limits of backward
+	# branches per branch in the changeset. TODO: Fold that into
+	# the SQL query, i.e. move the crunching from Tcl to C.
+
 	array set tmp {}
 	foreach {rev children} [$self nextmap] {
 	    foreach child $children {
 		lappend tmp($rev) $myitemmap($child)
@@ -101,23 +113,16 @@
 	}
 	return [array get tmp]
     }
 
-    # result = list (changeset)
-    method successors {} {
-	# NOTE / FUTURE: Possible bottleneck.
-	set csets {}
-	foreach {_ children} [$self nextmap] {
-	    foreach child $children {
-		lappend csets $myitemmap($child)
-	    }
-	}
-	return [lsort -unique $csets]
-    }
-
     # result = dict (item -> list (changeset))
     method predecessormap {} {
-	# NOTE / FUTURE: Possible bottleneck.
+	# NOTE / FUTURE: Definitive bottleneck (can be millions of pairs).
+	#
+	# Only user is pass 9, computing the limits of backward
+	# branches per branch in the changeset. TODO: Fold that into
+	# the SQL query, i.e. move the crunching from Tcl to C.
+
 	array set tmp {}
 	foreach {rev children} [$self premap] {
 	    foreach child $children {
 		lappend tmp($rev) $myitemmap($child)
@@ -1058,8 +1063,66 @@
 	    lappend dependencies([list rev $rid]) [list sym::branch $parent]
 	}
 	return
     }
+
+    # result = list (changeset-id)
+    typemethod cs_successors {revisions} {
+        # This is a variant of 'successors' which maps the low-level
+        # data directly to the associated changesets. I.e. instead
+        # millions of dependency pairs (in extreme cases (Example: Tcl
+        # CVS)) we return a very short and much more manageable list
+        # of changesets.
+
+	set theset ('[join $revisions {','}]')
+	return [state run "
+	    SELECT C.cid
+	    FROM   revision R, csitem CI, changeset C
+	    WHERE  R.rid   IN $theset     -- Restrict to revisions of interest
+	    AND    R.child IS NOT NULL    -- Has primary child
+            AND    CI.iid = R.rid
+            AND    C.cid = CI.cid
+            AND    C.type = 0
+    UNION
+	    SELECT C.cid
+	    FROM   revision R, revisionbranchchildren B, csitem CI, changeset C
+	    WHERE  R.rid   IN $theset     -- Restrict to revisions of interest
+	    AND    R.rid = B.rid          -- Select subset of branch children
+            AND    CI.iid = R.rid
+            AND    C.cid = CI.cid
+            AND    C.type = 0
+    UNION
+	    SELECT C.cid
+	    FROM   revision R, revision RA, csitem CI, changeset C
+	    WHERE  R.rid   IN $theset      -- Restrict to revisions of interest
+	    AND    R.isdefault             -- Restrict to NTDB
+	    AND    R.dbchild IS NOT NULL   -- and last NTDB belonging to trunk
+	    AND    RA.rid = R.dbchild      -- Go directly to trunk root
+	    AND    RA.child IS NOT NULL    -- Has primary child.
+            AND    CI.iid = R.rid
+            AND    C.cid = CI.cid
+            AND    C.type = 0
+            AND    CI.iid = R.rid
+            AND    C.cid = CI.cid
+            AND    C.type = 0
+    UNION
+	    SELECT C.cid
+	    FROM   revision R, tag T, csitem CI, changeset C
+	    WHERE  R.rid in $theset
+	    AND    T.rev = R.rid
+            AND    CI.iid = T.tid
+            AND    C.cid = CI.cid
+            AND    C.type = 1
+    UNION
+	    SELECT C.cid
+	    FROM   revision R, branch B, csitem CI, changeset C
+	    WHERE  R.rid in $theset
+	    AND    B.root = R.rid
+            AND    CI.iid = B.bid
+            AND    C.cid = CI.cid
+            AND    C.type = 2
+	"]
+    }
 }
 
 # # ## ### ##### ######## ############# #####################
 ## Helper singleton. Commands for tag symbol changesets.
@@ -1137,8 +1200,14 @@
 	    AND    P.pid = TX.sid
 	"] {
 	    lappend dependencies([list sym::tag $tid]) [list sym::tag $parent]
 	}
+	return
+    }
+
+    # result = list (changeset-id)
+    typemethod cs_successors {tags} {
+	# Tags have no successors.
 	return
     }
 }
 
@@ -1251,8 +1320,47 @@
 	    AND    P.pid = T.sid
 	"] {
 	    lappend dependencies([list sym::branch $bid]) [list sym::tag $parent]
 	}
+	return
+    }
+
+    # result = list (changeset-id)
+    typemethod cs_successors {branches} {
+        # This is a variant of 'successors' which maps the low-level
+        # data directly to the associated changesets. I.e. instead
+        # millions of dependency pairs (in extreme cases (Example: Tcl
+        # CVS)) we return a very short and much more manageable list
+        # of changesets.
+
+	set theset ('[join $branches {','}]')
+        return [state run "
+	    SELECT C.cid
+	    FROM   branch B, revision R, csitem CI, changeset C
+	    WHERE  B.bid IN $theset
+	    AND    B.first = R.rid
+            AND    CI.iid = R.rid
+            AND    C.cid = CI.cid
+            AND    C.type = 0
+    UNION
+	    SELECT C.cid
+	    FROM   branch B, preferedparent P, branch BX, csitem CI, changeset C
+	    WHERE  B.bid IN $theset
+	    AND    B.sid = P.pid
+	    AND    BX.sid = P.sid
+            AND    CI.iid = BX.bid
+            AND    C.cid = CI.cid
+            AND    C.type = 2
+    UNION
+	    SELECT C.cid
+	    FROM   branch B, preferedparent P, tag T, csitem CI, changeset C
+	    WHERE  B.bid IN $theset
+	    AND    B.sid = P.pid
+	    AND    T.sid = P.sid
+            AND    CI.iid = T.tid
+            AND    C.cid = CI.cid
+            AND    C.type = 1
+	"]
 	return
     }
 
     # # ## ### ##### ######## #############