Artifact 93e4a1e5a55baba7b897dd6af2745f374b18af15
File
tools/cvs2fossil/lib/rcsparser.tcl
part of check-in
[38b967dcf5]
- Merge aku's CVS import changes into the main line. Fix a small bug in diff.c.
by
drh on
2007-11-17 00:29:42.
Also file
tools/cvs2fossil/lib/rcsparser.tcl
part of check-in
[fa643aa91d]
- Disabled check for control characters in the log message. Allowing this for the moment, lets see if we run into trouble later on. Further reworked the check of symbol names, disallow forward slashs only at end. Found legal tags containing forward slashs in the middle.
by
aku on
2007-10-24 07:54:01.
## -*- tcl -*-
# # ## ### ##### ######## ############# #####################
## Copyright (c) 2007 Andreas Kupries.
#
# This software is licensed as described in the file LICENSE, which
# you should have received as part of this distribution.
#
# This software consists of voluntary contributions made by many
# individuals. For exact contribution history, see the revision
# history and logs, available at http://fossil-scm.hwaci.com/fossil
# # ## ### ##### ######## ############# #####################
# A tool package, provides a parser for RCS archive files. This parser
# is implemented via recursive descent. It is not only given a file to
# process, but also a 'sink', an object it calls out to at important
# places of the parsing process to either signal an event and/or
# convey gathered information to it. The sink is responsible for the
# actual processing of the data in whatever way it desires.
# # ## ### ##### ######## ############# #####################
## Requirements
package require Tcl 8.4 ; # Required runtime.
package require snit ; # OO system.
package require fileutil ; # File utilities.
package require vc::tools::log ; # User feedback.
package require struct::list ; # Advanced list ops.
# # ## ### ##### ######## ############# #####################
##
snit::type ::vc::rcs::parser {
# # ## ### ##### ######## #############
## Public API
typemethod process {path sink} {
Initialize $path $sink
Call begin
Admin ; Deltas ; Description ; DeltaTexts
Call done
return
}
# # ## ### ##### ######## #############
## Internal methods, recursive descent, syntactical processing
proc Admin {} {
Head ; PrincipalBranch ; Access ; Symbols
Locks ; Strictness ; FileComment ; Expand
Call admindone
return
}
# # ## ### ##### ######## #############
proc Head {} {
RequiredLiteral head
RequiredNumber -> head
Semicolon
Call sethead $head
return
}
proc PrincipalBranch {} {
if {![OptionalLiteral branch]} return
RequiredNumber -> branch
Semicolon
Call setprincipalbranch $branch
return
}
proc Access {} {
RequiredLiteral access ;
Semicolon
return
}
proc Symbols {} {
RequiredLiteral symbols
while {[Ident -> symbol]} {
if {
*$} $symbol] ||
[string match */ $symbol]
} {
Rewind
Bad {symbol name}
}
RequiredNumber -> rev
Call deftag $symbol $rev
}
Semicolon
return
}
proc Locks {} {
# Not saving locks.
RequiredLiteral locks
while {[Ident -> symbol]} {
RequiredNumber -> l
}
Semicolon
return
}
proc Strictness {} {
# Not saving strictness
if {![OptionalLiteral strict]} return
Semicolon
return
}
proc FileComment {} {
if {![OptionalLiteral comment]} return
if {![OptionalString -> c]} return
Semicolon
Call setcomment $c
return
}
proc Expand {} {
# Not saving expanded keywords
if {![OptionalLiteral expand]} return
if {![OptionalString -> dummy]} return
Semicolon
return
}
# # ## ### ##### ######## #############
proc Deltas {} {
set ok [OptionalNumber -> rev]
while {$ok} {
Date -> d
Author -> a
State -> s
Branches -> b
NextRev -> n
Call def $rev $d $a $s $n $b
# Check if this is followed by a revision number or the
# literal 'desc'. If neither we consume whatever is there
# until the next semicolon, as it has to be a 'new
# phrase'. Otherwise, for a revision number we loop back
# and consume that revision, and lastly for 'desc' we stop
# completely as this signals the end of the revision tree
# and the beginning of the deltas.
while {1} {
set ok [OptionalNumber -> rev]
if {$ok} break
if {[LiteralPeek desc]} {
set ok 0
break
}
Anything -> dummy
Semicolon
}
}
Call defdone
return
}
# # ## ### ##### ######## #############
proc Date {_ dv} {
upvar 1 $dv d
RequiredLiteral date
RequiredNumber -> d
Semicolon
struct::list assign [split $d .] year month day hour min sec
if {$year < 100} {incr year 1900}
set d [clock scan "${year}-${month}-${day} ${hour}:${min}:${sec}"]
return
}
proc Author {_ av} {
upvar 1 $av a
RequiredLiteral author
Anything -> a
Semicolon
return
}
proc State {_ sv} {
upvar 1 $sv s
RequiredLiteral state
Anything -> s
Semicolon
return
}
proc Branches {_ bv} {
upvar 1 $bv b
RequiredLiteral branches
Anything -> b
Semicolon
return
}
proc NextRev {_ nv} {
upvar 1 $nv n
RequiredLiteral next
Anything -> n
Semicolon
return
}
# # ## ### ##### ######## #############
proc Description {} {
upvar 1 data data res res
RequiredLiteral desc
RequiredString -> d
Call setdesc $d
return
}
# # ## ### ##### ######## #############
proc DeltaTexts {} {
while {[OptionalNumber -> rev]} {
RequiredLiteral log
RequiredString -> cmsg
if {[regexp {[\000-\010\013\014\016-\037]} $cmsg]} {
#Rewind
#Bad "log message for $rev contains at least one control character"
}
RequiredLiteral text
RequiredStringRange -> delta
Call extend $rev $cmsg $delta
}
return
}
# # ## ### ##### ######## #############
## Internal methods, lexiographical processing
proc Semicolon {} {
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata match]
if {!$ok} { Expected ';' }
SkipOver match
return
}
proc RequiredLiteral {name} {
::variable mydata
::variable mypos
set pattern "\\A\\s*$name\\s*"
set ok [regexp -start $mypos -indices -- $pattern $mydata match]
if {!$ok} { Expected '$name' }
SkipOver match
return
}
proc OptionalLiteral {name} {
::variable mydata
::variable mypos
set pattern "\\A\\s*$name\\s*"
set ok [regexp -start $mypos -indices -- $pattern $mydata match]
if {!$ok} { return 0 }
SkipOver match
return 1
}
proc LiteralPeek {name} {
::variable mydata
::variable mypos
set pattern "\\A\\s*$name\\s*"
set ok [regexp -start $mypos -indices -- $pattern $mydata match]
if {!$ok} { return 0 }
# NO - SkipOver match - Only looking ahead here.
return 1
}
proc RequiredNumber {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set pattern {\A\s*((\d|\.)+)\s*}
set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
if {!$ok} { Expected id }
Extract $v -> value
SkipOver match
return
}
proc OptionalNumber {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set pattern {\A\s*((\d|\.)+)\s*}
set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
if {!$ok} { return 0 }
Extract $v -> value
SkipOver match
return 1
}
proc RequiredString {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
if {!$ok} { Expected string }
Extract $v -> value
set value [string map {@@ @} $value]
SkipOver match
return
}
proc RequiredStringRange {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match value]
if {!$ok} { Expected string }
SkipOver match
return
}
proc OptionalString {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
if {!$ok} { return 0 }
Extract $v -> value
set value [string map {@@ @} $value]
SkipOver match
return 1
}
proc Ident {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata]
if {$ok} { return 0 }
set ok [regexp -start $mypos -indices -- {\A\s*([^:]*)\s*:\s*} $mydata match v]
if {!$ok} { return 0 }
Extract $v -> value
SkipOver match
return 1
}
proc Anything {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
regexp -start $mypos -indices -- {\A\s*([^;]*)\s*} $mydata match v
Extract $v -> value
SkipOver match
return
}
# # ## ### ##### ######## #############
## Internal methods, input handling
proc Extract {range _ v} {
upvar 1 $v value
::variable mydata
struct::list assign $range s e
set value [string range $mydata $s $e]
return
}
proc SkipOver {mv} {
# Note: The indices are absolute!, not relative to the start
# location.
upvar 1 $mv match
::variable mypos
::variable mysize
::variable mylastpos
struct::list assign $match s e
#puts "<$s $e> [info level -1]"
set mylastpos $mypos
set mypos $e
incr mypos
log progress 2 rcs $mypos $mysize
#puts $mypos/$mysize
return
}
proc Rewind {} {
::variable mypos
::variable mylastpos
set mypos $mylastpos
return
}
proc Expected {x} {
::variable mydata
::variable mypos
set e $mypos ; incr e 30
return -code error -errorcode vc::rcs::parser \
"Expected $x @ '[string range $mydata $mypos $e]...'"
}
proc Bad {x} {
::variable mydata
::variable mypos
set e $mypos ; incr e 30
return -code error -errorcode vc::rcs::parser \
"Bad $x @ '[string range $mydata $mypos $e]...'"
}
# # ## ### ##### ######## #############
## Setup, callbacks.
proc Initialize {path sink} {
::variable mypos 0
::variable mydata [fileutil::cat -encoding binary $path]
::variable mysize [file size $path]
::variable mysink $sink
return
}
proc Call {args} {
::variable mysink
set cmd $mysink
foreach a $args { lappend cmd $a }
eval $cmd
return
}
# # ## ### ##### ######## #############
## Configuration
typevariable mydata {} ; # Rcs archive contents to process
typevariable mysize 0 ; # Length of contents
typevariable mysink {} ; # Sink to report to
pragma -hasinstances no ; # singleton
pragma -hastypeinfo no ; # no introspection
pragma -hastypedestroy no ; # immortal
# # ## ### ##### ######## #############
}
namespace eval ::vc::rcs {
namespace export parser
namespace eval parser {
namespace import ::vc::tools::log
log register rcs
}
}
# # ## ### ##### ######## ############# #####################
## Ready
package provide vc::rcs::parser 1.0
return