Overview
SHA1 Hash: | 8c6488ded2d656ba8e025c0abf9ed9bfaeab5007 |
---|---|
Date: | 2007-11-27 04:26:56 |
User: | aku |
Comment: | Continued work on the integrity checks for changesets. Moved callers out of transactions. Two checks are already tripping on bad changesets made by InitCSets (pass 5). |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified tools/cvs2fossil/lib/c2f_integrity.tcl from [69cecd83d4] to [19b5dfccf5].
@@ -28,25 +28,31 @@ snit::type ::vc::fossil::import::cvs::integrity { # # ## ### ##### ######## ############# ## Public API typemethod strict {} { + log write 4 integrity {Check database consistency} + set n 0 AllButMeta Meta return } typemethod metarelaxed {} { + log write 4 integrity {Check database consistency} + set n 0 AllButMeta return } typemethod changesets {} { + log write 4 integrity {Check database consistency} + set n 0 - RevisionCSetLinkage + AllChangesets RevisionChangesets SymbolChangesets return } @@ -54,11 +60,10 @@ ## Internal methods proc AllButMeta {} { # This code performs a number of paranoid checks of the # database, searching for inconsistent cross-references. - log write 4 integrity {Check database consistency} upvar 1 n n ; # Counter for the checks (we print an id before # the main label). # Find all revisions which disagree with their line of @@ -269,11 +274,10 @@ } proc Meta {} { # This code performs a number of paranoid checks of the # database, searching for inconsistent cross-references. - log write 4 integrity {Check database consistency} upvar 1 n n ; # Counter for the checks (we print an id before # the main label). # Find all revisions which disgree with their meta data about @@ -289,26 +293,254 @@ ; } return } - proc RevisionCSetLinkage {} { + proc AllChangesets {} { + # This code performs a number of paranoid checks of the + # database, searching for inconsistent changeset/revision + # information. + + upvar 1 n n ; # Counter for the checks (we print an id before + # the main label). + + # Find all revisions which are not used by at least one + # revision changeset. + Check \ + {All revisions have to be used by least one revision changeset} \ + {is not used by a revision changeset} { + -- Unused revisions = All revisions + -- - revisions used by revision changesets. + -- + -- Both sets can be computed easily, and subtracted + -- from each other. Then we can get the associated + -- file (name) for display. + + SELECT F.name, R.rev + FROM revision R, file F + WHERE R.rid IN (SELECT rid FROM revision -- All revisions + EXCEPT -- subtract + SELECT CR.rid FROM csrevision CR, changeset C -- revisions used + WHERE C.cid = CR.cid -- by any revision + AND C.type = 0) -- changeset + AND R.fid = F.fid -- get file of unused revision + } + # Find all revisions which are used by more than one revision + # changeset. + Check \ + {All revisions have to be used by at most one revision changeset} \ + {is used by multiple revision changesets} { + -- Principle of operation: Get all revision/changeset + -- pairs for all revision changesets, group by + -- revision to aggregate the changeset, counting + -- them. From the resulting revision/count table + -- select those with more than one user, and get their + -- associated file (name) for display. + + SELECT F.name, R.rev + FROM revision R, file F, + (SELECT CR.rid AS rid, count(CR.cid) AS count + FROM csrevision CR, changeset C + WHERE C.type = 0 + AND C.cid = CR.cid + GROUP BY CR.rid ) AS U + WHERE U.count > 1 + AND R.rid = U.rid + AND R.fid = F.fid + } + # All revisions in all changesets have to agree on the LOD + # their changeset belongs to. In other words, all revisions in + # a changeset have to refer to the same line of development. + # + # Instead of looking at all pairs of revisions in all + # changesets we generate the distinct set of all LODs + # referenced by the revisions of a changeset, look for those + # with cardinality > 1, and get the identifying information + # for the changesets found thusly. + CheckCS \ + {All revisions in a changeset have to belong to the same LOD} \ + {: Its revisions disagree about the LOD they belong to} { + SELECT T.name, C.cid + FROM changeset C, cstype T + WHERE C.cid IN (SELECT U.cid + FROM (SELECT DISTINCT CR.cid AS cid, R.lod AS lod + FROM csrevision CR, revision R + WHERE CR.rid = R.rid) AS U + GROUP BY U.cid HAVING COUNT(U.lod) > 1) + AND T.tid = C.type + } + # All revisions in all changesets have to agree on the project + # their changeset belongs to. In other words, all revisions in + # a changeset have to refer to the same project. + # + # Instead of looking at all pairs of revisions in all + # changesets we generate the distinct set of all projects + # referenced by the revisions of a changeset, look for those + # with cardinality > 1, and get the identifying information + # for the changesets found thusly. + CheckCS \ + {All revisions in a changeset have to belong to the same project} \ + {: Its revisions disagree about the project they belong to} { + SELECT T.name, C.cid + FROM changeset C, cstype T + WHERE C.cid IN (SELECT U.cid + FROM (SELECT DISTINCT CR.cid AS cid, F.pid AS pid + FROM csrevision CR, revision R, file F + WHERE CR.rid = R.rid + AND F.fid = R.fid) AS U + GROUP BY U.cid HAVING COUNT(U.pid) > 1) + AND T.tid = C.type + } + # All revisions in a single changeset have to belong to + # different files. Conversely: No two revisions of a single + # file are allowed to be in the same changeset. + # + # Instead of looking at all pairs of revisions in all + # changesets we generate the distinct set of all files + # referenced by the revisions of a changeset, and look for + # those with cardinality < the cardinality of the set of + # revisions, and get the identifying information for the + # changesets found thusly. + CheckCS \ + {All revisions in a changeset have to belong to different files} \ + {: Its revisions share files} { + SELECT T.name, C.cid + FROM changeset C, cstype T + WHERE C.cid IN (SELECT VV.cid + FROM (SELECT U.cid as cid, COUNT (U.fid) AS fcount + FROM (SELECT DISTINCT CR.cid AS cid, R.fid AS fid + FROM csrevision CR, revision R + WHERE CR.rid = R.rid) AS U + GROUP BY U.cid) AS UU, + (SELECT V.cid AS cid, COUNT (V.rid) AS rcount + FROM csrevision V + GROUP BY V.cid) AS VV + WHERE VV.cid = UU.cid + AND UU.fcount < VV.rcount) + AND T.tid = C.type + } + return } proc RevisionChangesets {} { + # This code performs a number of paranoid checks of the + # database, searching for inconsistent changeset/revision + # information. + + upvar 1 n n ; # Counter for the checks (we print an id before + # the main label). + + # All revisions used by revision changesets have to refer to + # the same meta information as their changeset. + CheckInCS \ + {All revisions have to agree with their revision changeset about the used meta information} \ + {disagrees with its revision changeset @ about the meta information} { + SELECT CT.name, C.cid, F.name, R.rev + FROM changeset C, cstype CT, revision R, file F, csrevision CR + WHERE C.type = 0 -- revision changesets only + AND C.cid = CR.cid -- changeset --> its revisions + AND R.rid = CR.rid -- look at them + AND R.mid != C.src -- Only those which disagree with changeset about the meta + AND R.fid = F.fid -- get file of the revision + AND CT.tid = C.type -- get changeset type, for labeling + } + return } proc SymbolChangesets {} { + # This code performs a number of paranoid checks of the + # database, searching for inconsistent changeset/revision + # information. + + return ; # Disabled for now, bottlenecks ... + + upvar 1 n n ; # Counter for the checks (we print an id before + # the main label). + + # The next two checks are BOTTLENECKS. In essence we are + # checking each symbol changeset one by one. + + # TODO: Try to rephrase the checks to make more use of + # indices, set and stream operations. + + # All revisions used by tag symbol changesets have to have the + # changeset's tag associated with them. + CheckInCS \ + {All revisions used by tag symbol changesets have to have the changeset's tag attached to them} \ + {does not have the tag of its symbol changeset @ attached to it} { + SELECT CT.name, C.cid, F.name, R.rev + FROM changeset C, cstype CT, revision R, file F, csrevision CR, tag T + WHERE C.type = 1 -- symbol changesets only + AND C.src = T.sid -- tag only, linked by symbol id + AND C.cid = CR.cid -- changeset --> its revisions + AND R.rid = CR.rid -- look at the revisions + -- and look for the tag among the attached ones. + AND T.sid NOT IN (SELECT TB.sid + FROM tag TB + WHERE TB.rev = R.rid) + AND R.fid = F.fid -- get file of revision + } + + # All revisions used by branch symbol changesets have to have + # the changeset's branch associated with them. + + CheckInCS \ + {All revisions used by branch symbol changesets have to have the changeset's branch attached to them} \ + {does not have the branch of its symbol changeset @ attached to it} { + SELECT CT.name, C.cid, F.name, R.rev, C.cid + FROM changeset C, cstype CT, revision R, file F, csrevision CR, branch B + WHERE C.type = 1 -- symbol changesets only + AND C.src = B.sid -- branches only + AND C.cid = CR.cid -- changeset --> its revisions + AND R.rid = CR.rid -- look at the revisions + -- and look for the branch among the attached ones. + AND B.sid NOT IN (SELECT BB.sid + FROM branch BB + WHERE BB.root = R.rid) + AND R.fid = F.fid -- get file of revision + } + + # TODO + # The state has to contain at least one tag symbol changeset + # for all known tags. + + # TODO + # The state has to contain at least one branch symbol changeset + # for all known branches. + return } proc Check {header label sql} { upvar 1 n n set ok 1 foreach {fname revnr} [state run $sql] { set ok 0 trouble fatal "$fname <$revnr> $label" + } + log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header" + return + } + + proc CheckCS {header label sql} { + upvar 1 n n + set ok 1 + foreach {ctype cid} [state run $sql] { + set ok 0 + trouble fatal "<$ctype $cid> $label" + } + log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header" + return + } + + proc CheckInCS {header label sql} { + upvar 1 n n + set ok 1 + foreach {cstype csid fname revnr} [state run $sql] { + set ok 0 + set b "<$cstype $csid>" + trouble fatal "$fname <$revnr> [string map [list @ $b] $label]" } log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok " : "Failed"}] ... $header" return }
Modified tools/cvs2fossil/lib/c2f_pbreakacycle.tcl from [949289215b] to [642a0a9f2e].
@@ -76,14 +76,14 @@ cyclebreaker breakcmd [myproc BreakCycle] state transaction { LoadCommitOrder cyclebreaker run break-all [myproc Changesets] - - repository printcsetstatistics - integrity changesets - } + } + + repository printcsetstatistics + integrity changesets return } typemethod discard {} { # Pass manager interface. Executed for all passes after the
Modified tools/cvs2fossil/lib/c2f_pbreakrcycle.tcl from [685d23fe9c] to [630bffc4ad].
@@ -66,14 +66,14 @@ cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break} state transaction { cyclebreaker run break-rev [myproc Changesets] + } - repository printcsetstatistics - integrity changesets - } + repository printcsetstatistics + integrity changesets return } typemethod discard {} { # Pass manager interface. Executed for all passes after the
Modified tools/cvs2fossil/lib/c2f_pbreakscycle.tcl from [258b058f24] to [87397f0522].
@@ -65,14 +65,14 @@ cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break} state transaction { cyclebreaker run break-sym [myproc Changesets] + } - repository printcsetstatistics - integrity changesets - } + repository printcsetstatistics + integrity changesets return } typemethod discard {} { # Pass manager interface. Executed for all passes after the
Modified tools/cvs2fossil/lib/c2f_pinitcsets.tcl from [47d9786663] to [3b39c5f0f7].
@@ -132,14 +132,14 @@ state transaction { CreateRevisionChangesets ; # Group file revisions into csets. BreakInternalDependencies ; # Split the csets based on internal conflicts. CreateSymbolChangesets ; # Create csets for tags and branches. PersistTheChangesets - - repository printcsetstatistics - integrity changesets } + + repository printcsetstatistics + integrity changesets return } typemethod discard {} { # Pass manager interface. Executed for all passes after the