Check-in [8c6488ded2]
Not logged in
Overview

SHA1 Hash:8c6488ded2d656ba8e025c0abf9ed9bfaeab5007
Date: 2007-11-27 04:26:56
User: aku
Comment:Continued work on the integrity checks for changesets. Moved callers out of transactions. Two checks are already tripping on bad changesets made by InitCSets (pass 5).
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified tools/cvs2fossil/lib/c2f_integrity.tcl from [69cecd83d4] to [19b5dfccf5].

@@ -28,25 +28,31 @@
 snit::type ::vc::fossil::import::cvs::integrity {
     # # ## ### ##### ######## #############
     ## Public API
 
     typemethod strict {} {
+	log write 4 integrity {Check database consistency}
+
 	set n 0
 	AllButMeta
 	Meta
 	return
     }
 
     typemethod metarelaxed {} {
+	log write 4 integrity {Check database consistency}
+
 	set n 0
 	AllButMeta
 	return
     }
 
     typemethod changesets {} {
+	log write 4 integrity {Check database consistency}
+
 	set n 0
-	RevisionCSetLinkage
+	AllChangesets
 	RevisionChangesets
 	SymbolChangesets
 	return
     }
 
@@ -54,11 +60,10 @@
     ## Internal methods
 
     proc AllButMeta {} {
 	# This code performs a number of paranoid checks of the
 	# database, searching for inconsistent cross-references.
-	log write 4 integrity {Check database consistency}
 
 	upvar 1 n n ; # Counter for the checks (we print an id before
 		      # the main label).
 
 	# Find all revisions which disagree with their line of
@@ -269,11 +274,10 @@
     }
 
     proc Meta {} {
 	# This code performs a number of paranoid checks of the
 	# database, searching for inconsistent cross-references.
-	log write 4 integrity {Check database consistency}
 
 	upvar 1 n n ; # Counter for the checks (we print an id before
 		      # the main label).
 
 	# Find all revisions which disgree with their meta data about
@@ -289,26 +293,254 @@
 		;
 	    }
 	return
     }
 
-    proc RevisionCSetLinkage {} {
+    proc AllChangesets {} {
+	# This code performs a number of paranoid checks of the
+	# database, searching for inconsistent changeset/revision
+	# information.
+
+	upvar 1 n n ; # Counter for the checks (we print an id before
+		      # the main label).
+
+	# Find all revisions which are not used by at least one
+	# revision changeset.
+	Check \
+	    {All revisions have to be used by least one revision changeset} \
+	    {is not used by a revision changeset} {
+		-- Unused revisions = All revisions
+		--                  - revisions used by revision changesets.
+		--
+		-- Both sets can be computed easily, and subtracted
+                -- from each other. Then we can get the associated
+                -- file (name) for display.
+
+		SELECT F.name, R.rev
+		FROM revision R, file F
+		WHERE R.rid IN (SELECT rid FROM revision                      -- All revisions
+				EXCEPT                                     -- subtract
+				SELECT CR.rid FROM csrevision CR, changeset C -- revisions used
+				WHERE C.cid = CR.cid                          -- by any revision
+				AND C.type = 0)                               -- changeset
+		AND   R.fid = F.fid              -- get file of unused revision
+	    }
+	# Find all revisions which are used by more than one revision
+	# changeset.
+	Check \
+	    {All revisions have to be used by at most one revision changeset} \
+	    {is used by multiple revision changesets} {
+		-- Principle of operation: Get all revision/changeset
+                -- pairs for all revision changesets, group by
+                -- revision to aggregate the changeset, counting
+                -- them. From the resulting revision/count table
+                -- select those with more than one user, and get their
+                -- associated file (name) for display.
+
+		SELECT F.name, R.rev
+		FROM revision R, file F,
+		     (SELECT CR.rid AS rid, count(CR.cid) AS count
+		      FROM csrevision CR, changeset C
+		      WHERE C.type = 0
+		      AND   C.cid = CR.cid
+		      GROUP BY CR.rid ) AS U
+		WHERE U.count > 1
+		AND R.rid = U.rid
+		AND R.fid = F.fid
+	    }
+	# All revisions in all changesets have to agree on the LOD
+	# their changeset belongs to. In other words, all revisions in
+	# a changeset have to refer to the same line of development.
+	#
+	# Instead of looking at all pairs of revisions in all
+	# changesets we generate the distinct set of all LODs
+	# referenced by the revisions of a changeset, look for those
+	# with cardinality > 1, and get the identifying information
+	# for the changesets found thusly.
+	CheckCS \
+	    {All revisions in a changeset have to belong to the same LOD} \
+	    {: Its revisions disagree about the LOD they belong to} {
+		SELECT T.name, C.cid
+		FROM   changeset C, cstype T
+		WHERE  C.cid IN (SELECT U.cid
+				 FROM (SELECT DISTINCT CR.cid AS cid, R.lod AS lod
+				       FROM   csrevision CR, revision R
+				       WHERE  CR.rid = R.rid) AS U
+				 GROUP BY U.cid HAVING COUNT(U.lod) > 1)
+		AND    T.tid = C.type
+	    }
+	# All revisions in all changesets have to agree on the project
+	# their changeset belongs to. In other words, all revisions in
+	# a changeset have to refer to the same project.
+	#
+	# Instead of looking at all pairs of revisions in all
+	# changesets we generate the distinct set of all projects
+	# referenced by the revisions of a changeset, look for those
+	# with cardinality > 1, and get the identifying information
+	# for the changesets found thusly.
+	CheckCS \
+	    {All revisions in a changeset have to belong to the same project} \
+	    {: Its revisions disagree about the project they belong to} {
+		SELECT T.name, C.cid
+		FROM   changeset C, cstype T
+		WHERE  C.cid IN (SELECT U.cid
+				 FROM (SELECT DISTINCT CR.cid AS cid, F.pid AS pid
+				       FROM   csrevision CR, revision R, file F
+				       WHERE  CR.rid = R.rid
+				       AND    F.fid  = R.fid) AS U
+				 GROUP BY U.cid HAVING COUNT(U.pid) > 1)
+		AND    T.tid = C.type
+	    }
+	# All revisions in a single changeset have to belong to
+	# different files. Conversely: No two revisions of a single
+	# file are allowed to be in the same changeset.
+	#
+	# Instead of looking at all pairs of revisions in all
+	# changesets we generate the distinct set of all files
+	# referenced by the revisions of a changeset, and look for
+	# those with cardinality < the cardinality of the set of
+	# revisions, and get the identifying information for the
+	# changesets found thusly.
+	CheckCS \
+	    {All revisions in a changeset have to belong to different files} \
+	    {: Its revisions share files} {
+		SELECT T.name, C.cid
+		FROM   changeset C, cstype T
+		WHERE  C.cid IN (SELECT VV.cid
+				 FROM (SELECT U.cid as cid, COUNT (U.fid) AS fcount
+				       FROM (SELECT DISTINCT CR.cid AS cid, R.fid AS fid
+					     FROM   csrevision CR, revision R
+					     WHERE  CR.rid = R.rid) AS U
+				       GROUP BY U.cid) AS UU,
+				      (SELECT V.cid AS cid, COUNT (V.rid) AS rcount
+				       FROM csrevision V
+				       GROUP BY V.cid) AS VV
+				 WHERE VV.cid = UU.cid
+				 AND   UU.fcount < VV.rcount)
+		AND    T.tid = C.type
+	    }
+	return
     }
 
     proc RevisionChangesets {} {
+	# This code performs a number of paranoid checks of the
+	# database, searching for inconsistent changeset/revision
+	# information.
+
+	upvar 1 n n ; # Counter for the checks (we print an id before
+		      # the main label).
+
+	# All revisions used by revision changesets have to refer to
+	# the same meta information as their changeset.
+	CheckInCS \
+	    {All revisions have to agree with their revision changeset about the used meta information} \
+	    {disagrees with its revision changeset @ about the meta information} {
+		SELECT CT.name, C.cid, F.name, R.rev
+		FROM changeset C, cstype CT, revision R, file F, csrevision CR
+		WHERE C.type = 0       -- revision changesets only
+		AND   C.cid  = CR.cid  -- changeset --> its revisions
+		AND   R.rid  = CR.rid  -- look at them
+		AND   R.mid != C.src   -- Only those which disagree with changeset about the meta
+		AND   R.fid = F.fid    -- get file of the revision
+		AND   CT.tid = C.type  -- get changeset type, for labeling
+	    }
+	return
     }
 
     proc SymbolChangesets {} {
+	# This code performs a number of paranoid checks of the
+	# database, searching for inconsistent changeset/revision
+	# information.
+
+	return ; # Disabled for now, bottlenecks ...
+
+	upvar 1 n n ; # Counter for the checks (we print an id before
+		      # the main label).
+
+	# The next two checks are BOTTLENECKS. In essence we are
+	# checking each symbol changeset one by one.
+
+	# TODO: Try to rephrase the checks to make more use of
+	# indices, set and stream operations.
+
+	# All revisions used by tag symbol changesets have to have the
+	# changeset's tag associated with them.
+	CheckInCS \
+	    {All revisions used by tag symbol changesets have to have the changeset's tag attached to them} \
+	    {does not have the tag of its symbol changeset @ attached to it} {
+		SELECT CT.name, C.cid, F.name, R.rev
+		FROM   changeset C, cstype CT, revision R, file F, csrevision CR, tag T
+		WHERE  C.type = 1       -- symbol changesets only
+		AND    C.src  = T.sid   -- tag only, linked by symbol id
+		AND    C.cid  = CR.cid  -- changeset --> its revisions
+		AND    R.rid  = CR.rid  -- look at the revisions
+		-- and look for the tag among the attached ones.
+		AND    T.sid NOT IN (SELECT TB.sid
+				     FROM   tag TB
+				     WHERE  TB.rev = R.rid)
+		AND    R.fid = F.fid    -- get file of revision
+	    }
+
+	# All revisions used by branch symbol changesets have to have
+	# the changeset's branch associated with them.
+
+	CheckInCS \
+	    {All revisions used by branch symbol changesets have to have the changeset's branch attached to them} \
+	    {does not have the branch of its symbol changeset @ attached to it} {
+		SELECT CT.name, C.cid, F.name, R.rev, C.cid
+		FROM   changeset C, cstype CT, revision R, file F, csrevision CR, branch B
+		WHERE  C.type = 1       -- symbol changesets only
+		AND    C.src  = B.sid   -- branches only
+		AND    C.cid  = CR.cid  -- changeset --> its revisions
+		AND    R.rid  = CR.rid  -- look at the revisions
+		-- and look for the branch among the attached ones.
+		AND    B.sid NOT IN (SELECT BB.sid
+				     FROM   branch BB
+				     WHERE  BB.root = R.rid)
+		AND    R.fid = F.fid    -- get file of revision
+	    }
+
+	# TODO
+	# The state has to contain at least one tag symbol changeset
+	# for all known tags.
+
+	# TODO
+	# The state has to contain at least one branch symbol changeset
+	# for all known branches.
+	return
     }
 
 
     proc Check {header label sql} {
 	upvar 1 n n
 	set ok 1
 	foreach {fname revnr} [state run $sql] {
 	    set ok 0
 	    trouble fatal "$fname <$revnr> $label"
+	}
+	log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok    " : "Failed"}] ... $header"
+	return
+    }
+
+    proc CheckCS {header label sql} {
+	upvar 1 n n
+	set ok 1
+	foreach {ctype cid} [state run $sql] {
+	    set ok 0
+	    trouble fatal "<$ctype $cid> $label"
+	}
+	log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok    " : "Failed"}] ... $header"
+	return
+    }
+
+    proc CheckInCS {header label sql} {
+	upvar 1 n n
+	set ok 1
+	foreach {cstype csid fname revnr} [state run $sql] {
+	    set ok 0
+	    set b "<$cstype $csid>"
+	    trouble fatal "$fname <$revnr> [string map [list @ $b] $label]"
 	}
 	log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok    " : "Failed"}] ... $header"
 	return
     }
 

Modified tools/cvs2fossil/lib/c2f_pbreakacycle.tcl from [949289215b] to [642a0a9f2e].

@@ -76,14 +76,14 @@
 	cyclebreaker breakcmd [myproc BreakCycle]
 
 	state transaction {
 	    LoadCommitOrder
 	    cyclebreaker run break-all [myproc Changesets]
-
-	    repository printcsetstatistics
-	    integrity changesets
-	}
+	}
+
+	repository printcsetstatistics
+	integrity changesets
 	return
     }
 
     typemethod discard {} {
 	# Pass manager interface. Executed for all passes after the

Modified tools/cvs2fossil/lib/c2f_pbreakrcycle.tcl from [685d23fe9c] to [630bffc4ad].

@@ -66,14 +66,14 @@
 
 	cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break}
 
 	state transaction {
 	    cyclebreaker run break-rev [myproc Changesets]
+	}
 
-	    repository printcsetstatistics
-	    integrity changesets
-	}
+	repository printcsetstatistics
+	integrity changesets
 	return
     }
 
     typemethod discard {} {
 	# Pass manager interface. Executed for all passes after the

Modified tools/cvs2fossil/lib/c2f_pbreakscycle.tcl from [258b058f24] to [87397f0522].

@@ -65,14 +65,14 @@
 
 	cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break}
 
 	state transaction {
 	    cyclebreaker run break-sym [myproc Changesets]
+	}
 
-	    repository printcsetstatistics
-	    integrity changesets
-	}
+	repository printcsetstatistics
+	integrity changesets
 	return
     }
 
     typemethod discard {} {
 	# Pass manager interface. Executed for all passes after the

Modified tools/cvs2fossil/lib/c2f_pinitcsets.tcl from [47d9786663] to [3b39c5f0f7].

@@ -132,14 +132,14 @@
 	state transaction {
 	    CreateRevisionChangesets  ; # Group file revisions into csets.
 	    BreakInternalDependencies ; # Split the csets based on internal conflicts.
 	    CreateSymbolChangesets    ; # Create csets for tags and branches.
 	    PersistTheChangesets
-
-	    repository printcsetstatistics
-	    integrity changesets
 	}
+
+	repository printcsetstatistics
+	integrity changesets
 	return
     }
 
     typemethod discard {} {
 	# Pass manager interface. Executed for all passes after the