Overview
SHA1 Hash: | e7bb3d073d90a44b8de6ecaa99943054d83a06d5 |
---|---|
Date: | 2007-10-06 21:00:48 |
User: | aku |
Comment: | rcs parser rewritten to snit type, singleton, using callbacks to convey information. Got a grip on regexp -start -indices (^ -> \A, indices are absolute). This means that I now don't have to shift the unprocessed string down in memory, it is all index processing now. This makes for a much faster parser, especially for large files with many revisions (example: ChangeLogs). |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified tools/cvs2fossil/lib/rcsparser.tcl from [2b8758d0ac] to [6dff67402b].
@@ -1,378 +1,423 @@ -# ----------------------------------------------------------------------------- -# Tool packages. Parsing RCS files. -# -# Some of the information in RCS files is skipped over, most -# importantly the actual delta texts. The users of this parser need -# only the meta-data about when revisions were added, the tree -# (branching) structure, commit messages. +## -*- tcl -*- +# # ## ### ##### ######## ############# ##################### +## Copyright (c) 2007 Andreas Kupries. # -# The parser is based on Recursive Descent. - -# ----------------------------------------------------------------------------- -# Requirements - -package require Tcl 8.4 -package require fileutil ; # Tcllib (cat) -package require vc::tools::log ; # User feedback - -namespace eval ::vc::rcs::parser { - namespace import ::vc::tools::log - log register rcs -} - -# ----------------------------------------------------------------------------- -# API - -# vc::rcs::parser::process file -# -# Parses the rcs file and returns a dictionary containing the meta -# data. The following keys are used -# -# Key Meaning -# --- ------- -# 'head' head revision -# 'branch' ? -# 'symbol' dict (symbol -> revision) -# 'lock' dict (symbol -> revision) -# 'comment' file comment -# 'expand' ? -# 'date' dict (revision -> date) -# 'author' dict (revision -> author) -# 'state' dict (revision -> state) -# 'parent' dict (revision -> parent revision) -# 'commit' dict (revision -> commit message) +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. # -# The state 'dead' has special meaning, the user should know that. - -# ----------------------------------------------------------------------------- -# API Implementation - -proc ::vc::rcs::parser::configure {key value} { - variable cache - switch -exact -- $key { - -cache { - set cache $value +# This software consists of voluntary contributions made by many +# individuals. For exact contribution history, see the revision +# history and logs, available at http://fossil-scm.hwaci.com/fossil +# # ## ### ##### ######## ############# ##################### + +# A tool package, provides a parser for RCS archive files. This parser +# is implemented via recursive descent. It is not only given a file to +# process, but also a 'sink', an object it calls out to at important +# places of the parsing process to either signal an event and/or +# convey gathered information to it. The sink is responsible for the +# actual processing of the data in whatever way it desires. + +# # ## ### ##### ######## ############# ##################### +## Requirements + +package require Tcl 8.4 ; # Required runtime. +package require snit ; # OO system. +package require fileutil ; # File utilities. +package require vc::tools::log ; # User feedback. +package require struct::list ; # Advanced list ops. + +# # ## ### ##### ######## ############# ##################### +## + +snit::type ::vc::rcs::parser { + # # ## ### ##### ######## ############# + ## Public API + + typemethod process {path sink} { + Initialize $path $sink + Call begin + Admin ; Deltas ; Description ; DeltaTexts + Call done + return + } + + # # ## ### ##### ######## ############# + ## Internal methods, recursive descent, syntactical processing + + proc Admin {} { + Head ; PrincipalBranch ; Access ; Symbols + Locks ; Strictness ; FileComment ; Expand + Call admindone + return + } + + # # ## ### ##### ######## ############# + + proc Head {} { + RequiredLiteral head + RequiredNumber -> head + Semicolon + Call sethead $head + return + } + + proc PrincipalBranch {} { + if {![OptionalLiteral branch]} return + RequiredNumber -> branch + Semicolon + Call setprincipalbranch $branch + return + } + + proc Access {} { + RequiredLiteral access ; + Semicolon + return + } + + proc Symbols {} { + set d {} + RequiredLiteral symbols + while {[Ident -> symbol]} { + RequiredNumber -> rev + # TODO: distinguish branch/tag symbols, transform revision + # numbers into canonical form. + lappend d $symbol $rev + } + Semicolon + Call setsymbols $d + return + } + + proc Locks {} { + # Not saving locks. + RequiredLiteral locks + while {[Ident -> symbol]} { + RequiredNumber -> l } - default { - return -code error "Unknown switch $key, expected one of -cache" + Semicolon + return + } + + proc Strictness {} { + # Not saving strictness + if {![OptionalLiteral strict]} return + Semicolon + return + } + + proc FileComment {} { + if {![OptionalLiteral comment]} return + if {![OptionalString -> c]} return + Semicolon + Call setcomment $c + return + } + + proc Expand {} { + # Not saving expanded keywords + if {![OptionalLiteral expand]} return + if {![OptionalString -> dummy]} return + Semicolon + return + } + + # # ## ### ##### ######## ############# + + proc Deltas {} { + while {[OptionalNumber -> rev]} { + Date -> d + Author -> a + State -> s + Branches -> b + NextRev -> n + Call def $rev $d $a $s $n $b } - } - return -} - -proc ::vc::rcs::parser::process {path} { - variable cache - - if {!$cache} { - return [Process $path] - } - - set cachefile [Cache $path] - if { - [file exists $cachefile] && - ([file mtime $cachefile] > [file mtime $path]) - } { - # Use preparsed data if not invalidated by changes to the - # archive they are derived from. - write 4 rcs {Load preparsed data block} - return [fileutil::cat -encoding binary $cachefile] - } - - set res [Process $path] - - # Save parse result for quick pickup by future runs. - fileutil::writeFile $cachefile $res - - return $res -} - -# ----------------------------------------------------------------------------- - -proc ::vc::rcs::parser::Process {path} { - set data [fileutil::cat -encoding binary $path] - array set res {} - set res(size) [file size $path] - set res(done) 0 - set res(nsize) [string length $res(size)] - - Admin - Deltas - Description - DeltaTexts - - # Remove parser state - catch {unset res(id)} - catch {unset res(lastval)} - unset res(size) - unset res(nsize) - unset res(done) - - return [array get res] -} - -proc ::vc::rcs::parser::Cache {path} { - return ${path},,preparsed -} - -# ----------------------------------------------------------------------------- -# Internal - Recursive Descent functions implementing the syntax. - -proc ::vc::rcs::parser::Admin {} { - upvar 1 data data res res - Head ; Branch ; Access ; Symbols ; Locks ; Strict ; Comment ; Expand - return -} - -proc ::vc::rcs::parser::Deltas {} { - upvar 1 data data res res - while {[Num 0]} { IsIdent ; Date ; Author ; State ; Branches ; NextRev } - return -} - -proc ::vc::rcs::parser::Description {} { - upvar 1 data data res res - Literal desc - String 1 - Def desc - return -} - -proc ::vc::rcs::parser::DeltaTexts {} { - upvar 1 data data res res - while {[Num 0]} { IsIdent ; Log ; Text } - return -} - -proc ::vc::rcs::parser::Head {} { - upvar 1 data data res res - Literal head ; Num 1 ; Literal \; - Def head - return -} - -proc ::vc::rcs::parser::Branch {} { - upvar 1 data data res res - if {![Literal branch 0]} return ; Num 1 ; Literal \; - Def branch - return -} - -proc ::vc::rcs::parser::Access {} { - upvar 1 data data res res - Literal access ; Literal \; - return -} - -proc ::vc::rcs::parser::Symbols {} { - upvar 1 data data res res - Literal symbols - while {[Ident]} { Num 1 ; Map symbol } - Literal \; - return -} - -proc ::vc::rcs::parser::Locks {} { - upvar 1 data data res res - Literal locks - while {[Ident]} { Num 1 ; Map lock } - Literal \; - return -} - -proc ::vc::rcs::parser::Strict {} { - upvar 1 data data res res - if {![Literal strict 0]} return ; Literal \; - return -} - -proc ::vc::rcs::parser::Comment {} { - upvar 1 data data res res - if {![Literal comment 0]} return ; - if {![String 0]} return ; - Literal \; - Def comment - return -} - -proc ::vc::rcs::parser::Expand {} { - upvar 1 data data res res - if {![Literal expand 0]} return ; - if {![String 0]} return ; - Literal \; - Def expand - return -} - -proc ::vc::rcs::parser::Date {} { - upvar 1 data data res res - Literal date ; Num 1 ; Literal \; - - foreach {yr mo dy h m s} [split $res(lastval) .] break - if {$yr < 100} {incr yr 1900} - set res(lastval) [join [list $yr $mo $dy $h $m $s] .] - Map date - return -} - -proc ::vc::rcs::parser::Author {} { - upvar 1 data data res res - Literal author ; Skip ; Literal \; ; Map author - return -} - -proc ::vc::rcs::parser::State {} { - upvar 1 data data res res - Literal state ; Skip ; Literal \; ; Map state - return -} - -proc ::vc::rcs::parser::Branches {} { - upvar 1 data data res res - Literal branches ; Skip ; Literal \; - return -} - -proc ::vc::rcs::parser::NextRev {} { - upvar 1 data data res res - Literal next ; Skip ; Literal \; ; Map parent - return -} - -proc ::vc::rcs::parser::Log {} { - upvar 1 data data res res - Literal log ; String 1 ; Map commit - return -} - -proc ::vc::rcs::parser::Text {} { - upvar 1 data data res res - Literal text ; String 1 - return -} - -# ----------------------------------------------------------------------------- -# Internal - Lexicographical commands and data aquisition preparation - -proc ::vc::rcs::parser::Ident {} { - upvar 1 data data res res - - #puts I@?<[string range $data 0 10]...> - - if {[regexp -indices -- {^\s*;\s*} $data]} { - return 0 - } elseif {![regexp -indices -- {^\s*([^:]*)\s*:\s*} $data match val]} { - return 0 - } - - Get $val ; IsIdent - Next - return 1 -} - -proc ::vc::rcs::parser::Literal {name {required 1}} { - upvar 1 data data res res - if {![regexp -indices -- "^\\s*$name\\s*" $data match]} { - if {$required} { - return -code error "Expected '$name' @ '[string range $data 0 30]...'" + return + } + + # # ## ### ##### ######## ############# + + proc Date {_ dv} { + upvar 1 $dv d + RequiredLiteral date + RequiredNumber -> d + Semicolon + + struct::list assign [split $d .] year month day hour min sec + if {$year < 100} {incr year 1900} + set d [clock scan "${year}-${month}-${day} ${hour}:${min}:${sec}"] + return + } + + proc Author {_ av} { + upvar 1 $av a + RequiredLiteral author + Anything -> a + Semicolon + return + } + + proc State {_ sv} { + upvar 1 $sv s + RequiredLiteral state + Anything -> s + Semicolon + return + } + + proc Branches {_ bv} { + upvar 1 $bv b + RequiredLiteral branches + Anything -> b + Semicolon + return + } + + proc NextRev {_ nv} { + upvar 1 $nv n + RequiredLiteral next + Anything -> n + Semicolon + return + } + + # # ## ### ##### ######## ############# + + proc Description {} { + upvar 1 data data res res + RequiredLiteral desc + RequiredString -> d + Call setdesc $d + return + } + + # # ## ### ##### ######## ############# + + proc DeltaTexts {} { + while {[OptionalNumber -> rev]} { + RequiredLiteral log + RequiredString -> cmsg + RequiredLiteral text + RequiredStringRange -> delta + Call extend $rev $cmsg $delta } - return 0 + return + } + + # # ## ### ##### ######## ############# + ## Internal methods, lexiographical processing + + proc Semicolon {} { + ::variable mydata + ::variable mypos + + set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata match] + if {!$ok} { Expected ';' } + + SkipOver match + return + } + + proc RequiredLiteral {name} { + ::variable mydata + ::variable mypos + + set pattern "\\A\\s*$name\\s*" + set ok [regexp -start $mypos -indices -- $pattern $mydata match] + if {!$ok} { Expected '$name' } + + SkipOver match + return + } + + proc OptionalLiteral {name} { + ::variable mydata + ::variable mypos + + set pattern "\\A\\s*$name\\s*" + set ok [regexp -start $mypos -indices -- $pattern $mydata match] + if {!$ok} { return 0 } + + SkipOver match + return 1 + } + + proc RequiredNumber {_ v} { + upvar 1 $v value + ::variable mydata + ::variable mypos + + set pattern {\A\s*((\d|\.)+)\s*} + set ok [regexp -start $mypos -indices -- $pattern $mydata match v] + if {!$ok} { Expected id } + + Extract $v -> value + SkipOver match + return + } + + proc OptionalNumber {_ v} { + upvar 1 $v value + ::variable mydata + ::variable mypos + + set pattern {\A\s*((\d|\.)+)\s*} + set ok [regexp -start $mypos -indices -- $pattern $mydata match v] + if {!$ok} { return 0 } + + Extract $v -> value + SkipOver match + return 1 + } + + proc RequiredString {_ v} { + upvar 1 $v value + ::variable mydata + ::variable mypos + + set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v] + if {!$ok} { Expected string } + + Extract $v -> value + set value [string map {@@ @} $value] + SkipOver match + return + } + + proc RequiredStringRange {_ v} { + upvar 1 $v value + ::variable mydata + ::variable mypos + + set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match value] + if {!$ok} { Expected string } + + SkipOver match + return + } + + proc OptionalString {_ v} { + upvar 1 $v value + ::variable mydata + ::variable mypos + + set ok [regexp -start $mypos -indices -- {\A\s*@(([^@]*(@@)*)*)@\s*} $mydata match v] + if {!$ok} { return 0 } + + Extract $v -> value + set value [string map {@@ @} $value] + SkipOver match + return 1 + } + + proc Ident {_ v} { + upvar 1 $v value + ::variable mydata + ::variable mypos + + set ok [regexp -start $mypos -indices -- {\A\s*;\s*} $mydata] + if {$ok} { return 0 } + + set ok [regexp -start $mypos -indices -- {\A\s*([^:]*)\s*:\s*} $mydata match v] + if {!$ok} { return 0 } + + Extract $v -> value + SkipOver match + return 1 + } + + proc Anything {_ v} { + upvar 1 $v value + ::variable mydata + ::variable mypos + + regexp -start $mypos -indices -- {\A\s*([^;]*)\s*} $mydata match v + + Extract $v -> value + SkipOver match + return + } + + # # ## ### ##### ######## ############# + ## Internal methods, input handling + + proc Extract {range _ v} { + upvar 1 $v value + ::variable mydata + struct::list assign $range s e + set value [string range $mydata $s $e] + return + } + + proc SkipOver {mv} { + # Note: The indices are absolute!, not relative to the start + # location. + upvar 1 $mv match + ::variable mypos + ::variable mysize + + struct::list assign $match s e + #puts "<$s $e> [info level -1]" + + set mypos $e + incr mypos + + log progress 2 rcs $mypos $mysize + #puts $mypos/$mysize + return + } + + proc Expected {x} { + ::variable mydata + ::variable mypos + set e $mypos ; incr e 30 + return -code error "Expected $x @ '[string range $mydata $mypos $e]...'" } - Next - return 1 -} - -proc ::vc::rcs::parser::String {{required 1}} { - upvar 1 data data res res + # # ## ### ##### ######## ############# + ## Setup, callbacks. - if {![regexp -indices -- {^\s*@(([^@]*(@@)*)*)@\s*} $data match val]} { - if {$required} { - return -code error "Expected string @ '[string range $data 0 30]...'" - } - return 0 + proc Initialize {path sink} { + ::variable mypos 0 + ::variable mydata [fileutil::cat -encoding binary $path] + ::variable mysize [file size $path] + ::variable mysink $sink + return } - Get $val - Next - return 1 -} - -proc ::vc::rcs::parser::Num {required} { - upvar 1 data data res res - if {![regexp -indices -- {^\s*((\d|\.)+)\s*} $data match val]} { - if {$required} { - return -code error "Expected id @ '[string range $data 0 30]...'" - } - return 0 + proc Call {args} { + ::variable mysink + set cmd $mysink + foreach a $args { lappend cmd $a } + eval $cmd + return } - Get $val - Next - return 1 -} + # # ## ### ##### ######## ############# + ## Configuration + + typevariable mydata {} ; # Rcs archive contents to process + typevariable mysize 0 ; # Length of contents + typevariable mysink {} ; # Sink to report to + + pragma -hasinstances no ; # singleton + pragma -hastypeinfo no ; # no introspection + pragma -hastypedestroy no ; # immortal -proc ::vc::rcs::parser::Skip {} { - upvar 1 data data res res - regexp -indices -- {^\s*([^;]*)\s*} $data match val - Get $val - Next - return + # # ## ### ##### ######## ############# } -# ----------------------------------------------------------------------------- -# Internal - Data aquisition - -proc ::vc::rcs::parser::Def {key} { - upvar 1 data data res res - set res($key) $res(lastval) - unset res(lastval) - return +namespace eval ::vc::rcs { + namespace export parser + namespace eval parser { + namespace import ::vc::tools::log + log register rcs + } } -proc ::vc::rcs::parser::Map {key} { - upvar 1 data data res res - lappend res($key) $res(id) $res(lastval) - #puts Map($res(id))=($res(lastval)) - unset res(lastval) - #unset res(id);#Keep id for additional mappings. - return -} - -proc ::vc::rcs::parser::IsIdent {} { - upvar 1 data data res res - set res(id) $res(lastval) - unset res(lastval) - return -} - -proc ::vc::rcs::parser::Get {val} { - upvar 1 data data res res - foreach {s e} $val break - set res(lastval) [string range $data $s $e] - #puts G|$res(lastval) - return -} - -proc ::vc::rcs::parser::Next {} { - upvar 1 match match data data res res - foreach {s e} $match break ; incr e - set data [string range $data $e end] - set res(done) [expr {$res(size) - [string length $data]}] - - progress 2 rcs $res(done) $res(size) - return -} - -# ----------------------------------------------------------------------------- - -namespace eval ::vc::rcs::parser { - variable cache 0 ; # No result caching by default. - - namespace export process configure -} - -# ----------------------------------------------------------------------------- -# Ready +# # ## ### ##### ######## ############# ##################### +## Ready package provide vc::rcs::parser 1.0 return