Overview
SHA1 Hash: | 9c5705502507e993fa2486d31093571139db2128 |
---|---|
Date: | 2007-12-02 05:49:00 |
User: | aku |
Comment: | Performance bugfix. nextmap/premap can still be performance killers and memory hogs. Moved the computation of sucessor changesets down to the type-dependent code (new methods) and the SQL database, i.e. the C level. In the current setup it was possible that the DB would deliver us millions of file-level dependency pairs which the Tcl level would then reduce to tens of actual changeset dependencies. Tcl did not cope well with that amount of data. Now the reduction happens in the query itself. A concrete example was a branch in the Tcl CVS generating nearly 9 million pairs, which reduced to roughly 200 changeset dependencies. This blew the memory out of the water and the converter ground to a halt, busily swapping. Ok, causes behind us, also added another index on 'csitem(iid)' to speed the search for changesets from the revisions, tags, and branches. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified tools/cvs2fossil/lib/c2f_pinitcsets.tcl from [39a67db2c5] to [f56c12b551].
@@ -93,11 +93,12 @@ cid INTEGER NOT NULL REFERENCES changeset, pos INTEGER NOT NULL, iid INTEGER NOT NULL, -- REFERENCES revision|tag|branch UNIQUE (cid, pos), UNIQUE (cid, iid) - } + } { iid } + # Index on: iid (successor/predecessor retrieval) project::rev getcstypes return }
Modified tools/cvs2fossil/lib/c2f_prev.tcl from [e427524ce6] to [a4cf35790a].
@@ -87,13 +87,25 @@ delegate method istag to mytypeobj method setpos {p} { set mypos $p ; return } method pos {} { return $mypos } + # result = list (changeset) + method successors {} { + return [struct::list map \ + [$mytypeobj cs_successors $myitems] \ + [mytypemethod of]] + } + # result = dict (item -> list (changeset)) method successormap {} { - # NOTE / FUTURE: Possible bottleneck. + # NOTE / FUTURE: Definitive bottleneck (can be millions of pairs). + # + # Only user is pass 9, computing the limits of backward + # branches per branch in the changeset. TODO: Fold that into + # the SQL query, i.e. move the crunching from Tcl to C. + array set tmp {} foreach {rev children} [$self nextmap] { foreach child $children { lappend tmp($rev) $myitemmap($child) } @@ -100,25 +112,18 @@ set tmp($rev) [lsort -unique $tmp($rev)] } return [array get tmp] } - # result = list (changeset) - method successors {} { - # NOTE / FUTURE: Possible bottleneck. - set csets {} - foreach {_ children} [$self nextmap] { - foreach child $children { - lappend csets $myitemmap($child) - } - } - return [lsort -unique $csets] - } - # result = dict (item -> list (changeset)) method predecessormap {} { - # NOTE / FUTURE: Possible bottleneck. + # NOTE / FUTURE: Definitive bottleneck (can be millions of pairs). + # + # Only user is pass 9, computing the limits of backward + # branches per branch in the changeset. TODO: Fold that into + # the SQL query, i.e. move the crunching from Tcl to C. + array set tmp {} foreach {rev children} [$self premap] { foreach child $children { lappend tmp($rev) $myitemmap($child) } @@ -1057,10 +1062,68 @@ "] { lappend dependencies([list rev $rid]) [list sym::branch $parent] } return } + + # result = list (changeset-id) + typemethod cs_successors {revisions} { + # This is a variant of 'successors' which maps the low-level + # data directly to the associated changesets. I.e. instead + # millions of dependency pairs (in extreme cases (Example: Tcl + # CVS)) we return a very short and much more manageable list + # of changesets. + + set theset ('[join $revisions {','}]') + return [state run " + SELECT C.cid + FROM revision R, csitem CI, changeset C + WHERE R.rid IN $theset -- Restrict to revisions of interest + AND R.child IS NOT NULL -- Has primary child + AND CI.iid = R.rid + AND C.cid = CI.cid + AND C.type = 0 + UNION + SELECT C.cid + FROM revision R, revisionbranchchildren B, csitem CI, changeset C + WHERE R.rid IN $theset -- Restrict to revisions of interest + AND R.rid = B.rid -- Select subset of branch children + AND CI.iid = R.rid + AND C.cid = CI.cid + AND C.type = 0 + UNION + SELECT C.cid + FROM revision R, revision RA, csitem CI, changeset C + WHERE R.rid IN $theset -- Restrict to revisions of interest + AND R.isdefault -- Restrict to NTDB + AND R.dbchild IS NOT NULL -- and last NTDB belonging to trunk + AND RA.rid = R.dbchild -- Go directly to trunk root + AND RA.child IS NOT NULL -- Has primary child. + AND CI.iid = R.rid + AND C.cid = CI.cid + AND C.type = 0 + AND CI.iid = R.rid + AND C.cid = CI.cid + AND C.type = 0 + UNION + SELECT C.cid + FROM revision R, tag T, csitem CI, changeset C + WHERE R.rid in $theset + AND T.rev = R.rid + AND CI.iid = T.tid + AND C.cid = CI.cid + AND C.type = 1 + UNION + SELECT C.cid + FROM revision R, branch B, csitem CI, changeset C + WHERE R.rid in $theset + AND B.root = R.rid + AND CI.iid = B.bid + AND C.cid = CI.cid + AND C.type = 2 + "] + } } # # ## ### ##### ######## ############# ##################### ## Helper singleton. Commands for tag symbol changesets. @@ -1136,10 +1199,16 @@ AND T.sid = P.sid AND P.pid = TX.sid "] { lappend dependencies([list sym::tag $tid]) [list sym::tag $parent] } + return + } + + # result = list (changeset-id) + typemethod cs_successors {tags} { + # Tags have no successors. return } } # # ## ### ##### ######## ############# ##################### @@ -1250,10 +1319,49 @@ AND B.sid = P.sid AND P.pid = T.sid "] { lappend dependencies([list sym::branch $bid]) [list sym::tag $parent] } + return + } + + # result = list (changeset-id) + typemethod cs_successors {branches} { + # This is a variant of 'successors' which maps the low-level + # data directly to the associated changesets. I.e. instead + # millions of dependency pairs (in extreme cases (Example: Tcl + # CVS)) we return a very short and much more manageable list + # of changesets. + + set theset ('[join $branches {','}]') + return [state run " + SELECT C.cid + FROM branch B, revision R, csitem CI, changeset C + WHERE B.bid IN $theset + AND B.first = R.rid + AND CI.iid = R.rid + AND C.cid = CI.cid + AND C.type = 0 + UNION + SELECT C.cid + FROM branch B, preferedparent P, branch BX, csitem CI, changeset C + WHERE B.bid IN $theset + AND B.sid = P.pid + AND BX.sid = P.sid + AND CI.iid = BX.bid + AND C.cid = CI.cid + AND C.type = 2 + UNION + SELECT C.cid + FROM branch B, preferedparent P, tag T, csitem CI, changeset C + WHERE B.bid IN $theset + AND B.sid = P.pid + AND T.sid = P.sid + AND CI.iid = T.tid + AND C.cid = CI.cid + AND C.type = 1 + "] return } # # ## ### ##### ######## ############# ## Configuration