Artifact 6dff67402be8ad4f4c328ebbfbe56811492c1550
File
tools/cvs2fossil/lib/rcsparser.tcl
part of check-in
[e7bb3d073d]
- rcs parser rewritten to snit type, singleton, using callbacks to convey information. Got a grip on regexp -start -indices (^ -> \A, indices are absolute). This means that I now don't have to shift the unprocessed string down in memory, it is all index processing now. This makes for a much faster parser, especially for large files with many revisions (example: ChangeLogs).
by
aku on
2007-10-06 21:00:48.
## -*- tcl -*-
# # ## ### ##### ######## ############# #####################
## Copyright (c) 2007 Andreas Kupries.
#
# This software is licensed as described in the file LICENSE, which
# you should have received as part of this distribution.
#
# This software consists of voluntary contributions made by many
# individuals. For exact contribution history, see the revision
# history and logs, available at http://fossil-scm.hwaci.com/fossil
# # ## ### ##### ######## ############# #####################
# A tool package, provides a parser for RCS archive files. This parser
# is implemented via recursive descent. It is not only given a file to
# process, but also a 'sink', an object it calls out to at important
# places of the parsing process to either signal an event and/or
# convey gathered information to it. The sink is responsible for the
# actual processing of the data in whatever way it desires.
# # ## ### ##### ######## ############# #####################
## Requirements
package require Tcl 8.4 ; # Required runtime.
package require snit ; # OO system.
package require fileutil ; # File utilities.
package require vc::tools::log ; # User feedback.
package require struct::list ; # Advanced list ops.
# # ## ### ##### ######## ############# #####################
##
snit::type ::vc::rcs::parser {
# # ## ### ##### ######## #############
## Public API
typemethod process {path sink} {
Initialize $path $sink
Call begin
Admin ; Deltas ; Description ; DeltaTexts
Call done
return
}
# # ## ### ##### ######## #############
## Internal methods, recursive descent, syntactical processing
proc Admin {} {
Head ; PrincipalBranch ; Access ; Symbols
Locks ; Strictness ; FileComment ; Expand
Call admindone
return
}
# # ## ### ##### ######## #############
proc Head {} {
RequiredLiteral head
RequiredNumber -> head
Semicolon
Call sethead $head
return
}
proc PrincipalBranch {} {
if {![OptionalLiteral branch]} return
RequiredNumber -> branch
Semicolon
Call setprincipalbranch $branch
return
}
proc Access {} {
RequiredLiteral access ;
Semicolon
return
}
proc Symbols {} {
set d {}
RequiredLiteral symbols
while {[Ident -> symbol]} {
RequiredNumber -> rev
# TODO: distinguish branch/tag symbols, transform revision
# numbers into canonical form.
lappend d $symbol $rev
}
Semicolon
Call setsymbols $d
return
}
proc Locks {} {
# Not saving locks.
RequiredLiteral locks
while {[Ident -> symbol]} {
RequiredNumber -> l
}
Semicolon
return
}
proc Strictness {} {
# Not saving strictness
if {![OptionalLiteral strict]} return
Semicolon
return
}
proc FileComment {} {
if {![OptionalLiteral comment]} return
if {![OptionalString -> c]} return
Semicolon
Call setcomment $c
return
}
proc Expand {} {
# Not saving expanded keywords
if {![OptionalLiteral expand]} return
if {![OptionalString -> dummy]} return
Semicolon
return
}
# # ## ### ##### ######## #############
proc Deltas {} {
while {[OptionalNumber -> rev]} {
Date -> d
Author -> a
State -> s
Branches -> b
NextRev -> n
Call def $rev $d $a $s $n $b
}
return
}
# # ## ### ##### ######## #############
proc Date {_ dv} {
upvar 1 $dv d
RequiredLiteral date
RequiredNumber -> d
Semicolon
struct::list assign [split $d .] year month day hour min sec
if {$year < 100} {incr year 1900}
set d [clock scan "${year}-${month}-${day} ${hour}:${min}:${sec}"]
return
}
proc Author {_ av} {
upvar 1 $av a
RequiredLiteral author
Anything -> a
Semicolon
return
}
proc State {_ sv} {
upvar 1 $sv s
RequiredLiteral state
Anything -> s
Semicolon
return
}
proc Branches {_ bv} {
upvar 1 $bv b
RequiredLiteral branches
Anything -> b
Semicolon
return
}
proc NextRev {_ nv} {
upvar 1 $nv n
RequiredLiteral next
Anything -> n
Semicolon
return
}
# # ## ### ##### ######## #############
proc Description {} {
upvar 1 data data res res
RequiredLiteral desc
RequiredString -> d
Call setdesc $d
return
}
# # ## ### ##### ######## #############
proc DeltaTexts {} {
while {[OptionalNumber -> rev]} {
RequiredLiteral log
RequiredString -> cmsg
RequiredLiteral text
RequiredStringRange -> delta
Call extend $rev $cmsg $delta
}
return
}
# # ## ### ##### ######## #############
## Internal methods, lexiographical processing
proc Semicolon {} {
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata match]
if {!$ok} { Expected ';' }
SkipOver match
return
}
proc RequiredLiteral {name} {
::variable mydata
::variable mypos
set pattern "\\A\\s*$name\\s*"
set ok [regexp -start $mypos -indices -- $pattern $mydata match]
if {!$ok} { Expected '$name' }
SkipOver match
return
}
proc OptionalLiteral {name} {
::variable mydata
::variable mypos
set pattern "\\A\\s*$name\\s*"
set ok [regexp -start $mypos -indices -- $pattern $mydata match]
if {!$ok} { return 0 }
SkipOver match
return 1
}
proc RequiredNumber {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set pattern {\A\s*((\d|\.)+)\s*}
set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
if {!$ok} { Expected id }
Extract $v -> value
SkipOver match
return
}
proc OptionalNumber {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set pattern {\A\s*((\d|\.)+)\s*}
set ok [regexp -start $mypos -indices -- $pattern $mydata match v]
if {!$ok} { return 0 }
Extract $v -> value
SkipOver match
return 1
}
proc RequiredString {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
if {!$ok} { Expected string }
Extract $v -> value
set value [string map {@@ @} $value]
SkipOver match
return
}
proc RequiredStringRange {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match value]
if {!$ok} { Expected string }
SkipOver match
return
}
proc OptionalString {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v]
if {!$ok} { return 0 }
Extract $v -> value
set value [string map {@@ @} $value]
SkipOver match
return 1
}
proc Ident {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata]
if {$ok} { return 0 }
set ok [regexp -start $mypos -indices -- {\A\s*([^:]*)\s*:\s*} $mydata match v]
if {!$ok} { return 0 }
Extract $v -> value
SkipOver match
return 1
}
proc Anything {_ v} {
upvar 1 $v value
::variable mydata
::variable mypos
regexp -start $mypos -indices -- {\A\s*([^;]*)\s*} $mydata match v
Extract $v -> value
SkipOver match
return
}
# # ## ### ##### ######## #############
## Internal methods, input handling
proc Extract {range _ v} {
upvar 1 $v value
::variable mydata
struct::list assign $range s e
set value [string range $mydata $s $e]
return
}
proc SkipOver {mv} {
# Note: The indices are absolute!, not relative to the start
# location.
upvar 1 $mv match
::variable mypos
::variable mysize
struct::list assign $match s e
#puts "<$s $e> [info level -1]"
set mypos $e
incr mypos
log progress 2 rcs $mypos $mysize
#puts $mypos/$mysize
return
}
proc Expected {x} {
::variable mydata
::variable mypos
set e $mypos ; incr e 30
return -code error "Expected $x @ '[string range $mydata $mypos $e]...'"
}
# # ## ### ##### ######## #############
## Setup, callbacks.
proc Initialize {path sink} {
::variable mypos 0
::variable mydata [fileutil::cat -encoding binary $path]
::variable mysize [file size $path]
::variable mysink $sink
return
}
proc Call {args} {
::variable mysink
set cmd $mysink
foreach a $args { lappend cmd $a }
eval $cmd
return
}
# # ## ### ##### ######## #############
## Configuration
typevariable mydata {} ; # Rcs archive contents to process
typevariable mysize 0 ; # Length of contents
typevariable mysink {} ; # Sink to report to
pragma -hasinstances no ; # singleton
pragma -hastypeinfo no ; # no introspection
pragma -hastypedestroy no ; # immortal
# # ## ### ##### ######## #############
}
namespace eval ::vc::rcs {
namespace export parser
namespace eval parser {
namespace import ::vc::tools::log
log register rcs
}
}
# # ## ### ##### ######## ############# #####################
## Ready
package provide vc::rcs::parser 1.0
return