SHA1 Hash: | f94f7e5f49b8ca4458cd54d7ae34033a3d478b5b |
---|---|
Date: | 2008-05-16 00:27:49 |
User: | drh |
Comment: | Merge the fork back together. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Modified src/setup.c from [ccb6e89143] to [ba00230314].
@@ -600,10 +600,22 @@ @ <hr /> textarea_attribute("Project Description", 5, 60, "project-description", "pd", ""); @ <p>Describe your project. This will be used in page headers for search @ engines as well as a short RSS description.</p> + @ <hr /> + entry_attribute("Home page", 60, "project-home", "phome", ""); + @ <p>This sets the content source for the Home page ([/home]). + @ Enter a wiki page name (as wiki/PageName) or another URL relative to + @ this server's root (%s(g.zBaseURL)/). + @ The default is to use wiki/ProjectName. Make sure to use a valid name, + @ or your /home link will likely crash! (If that happens, simply come back + @ to this page and set it to a good (or empty) value.) + @ To use .wiki or .html files in your source tree as content, use a URL + @ in the form <tt>doc/VERSION/path/to/doc.wiki</tt>, where VERSION is either + @ the UUID of a version of that page or the word 'tip' to get the most recent + @ version.</p> @ <hr /> @ <p><input type="submit" name="submit" value="Apply Changes"></p> @ </form> db_end_transaction(0); style_footer();
Modified src/wiki.c from [52056439c2] to [97d9b1ab7c].
@@ -1,7 +1,8 @@ /* ** Copyright (c) 2007 D. Richard Hipp +** Copyright (c) 2008 Stephan Beal ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public ** License version 2 as published by the Free Software Foundation. ** @@ -76,22 +77,42 @@ ** WEBPAGE: home ** WEBPAGE: index ** WEBPAGE: not_found */ void home_page(void){ - char *zPageName = db_get("project-name",0); - if( zPageName ){ - login_check_credentials(); - g.zExtra = zPageName; - cgi_set_parameter_nocopy("name", g.zExtra); - g.okRdWiki = 1; - g.okApndWiki = 0; - g.okWrWiki = 0; - g.okHistory = 0; - wiki_page(); + char *zHomePage; /* name of home page */ + char *zProjName; /* name of project */ + zProjName = db_get("project-name",0); + zHomePage = db_get("project-home", zProjName ); + if( zProjName && zProjName[0] ){ + /* beware: this code causes cyclic redirects on a 404 because + not_found is directed here. + */ + int lenP; /* strncmp() bounder */ + int lenH; /* length of zProjName */ + if( zHomePage && ! zHomePage[0] ){ + zHomePage = zProjName; + } + lenP = strlen(zProjName); + lenH = strlen(zHomePage); + if( lenP < lenH ) lenP = lenH; + if( (zProjName == zHomePage) || (0==strncmp(zProjName,zHomePage,lenP)) || + (0==strncmp(zHomePage,"home",lenP)/*avoid endless loop*/) ){ + login_check_credentials(); + g.zExtra = zHomePage; + cgi_set_parameter_nocopy("name", g.zExtra); + g.okRdWiki = 1; + g.okApndWiki = 0; + g.okWrWiki = 0; + g.okHistory = 0; + wiki_page(); + }else{ + cgi_redirect( zHomePage ); + } return; } + style_header("Home"); @ <p>This is a stub home-page for the project. @ To fill in this page, first go to @ <a href="%s(g.zBaseURL)/setup_config">setup/config</a> @ and establish a "Project Name". Then create a @@ -599,20 +620,15 @@ @ </ol> style_footer(); } /* -** wiki_cmd_commit() is the implementation of "wiki commit ...". +** wiki_cmd_commit() is the implementation of "wiki commit ...". ** ** As arguments it expects: ** ** zPageName = the wiki entry's name. -** -** rid = record ID for the zPageName entry. This func SHOULD deduce -** this from zPageName, but the code which calls this func already has -** the rid, so we pass it along here. If it does not match the entry -** for zPageName then Undefined Behaviour. ** ** in = input file. The file is read until EOF but is not closed ** by this function (it might be stdin!). ** ** Returns 0 on error, non-zero on success. @@ -620,19 +636,33 @@ ** TODOs: ** - take EITHER zPageName OR rid. We don't need both. ** - make use of the return value. Add more error checking. ** - give the uuid back to the caller so it can be shown ** in the status output. ("committed version XXXXX of page ...") +** - return some status telling the user if there were no diffs +** (i.e. no commit). How can we find this out? */ -int wiki_cmd_commit( char const * zPageName, int rid, FILE * in ) +int wiki_cmd_commit( char const * zPageName, FILE * in ) { Blob wiki; /* Wiki page content */ Blob content; /* read-in content */ Blob cksum; /* wiki checksum */ + int rid; /* rid of existing entry. */ int nrid; /* not really sure */ char * zDate; /* timestamp */ char * zUuid; /* uuid for rid */ + + rid = db_int(0, "SELECT x.rid FROM tag t, tagxref x" + " WHERE x.tagid=t.tagid AND t.tagname='wiki-%q'" + " ORDER BY x.mtime DESC LIMIT 1", + zPageName + ); + if( ! rid ){ + fossil_fatal("wiki commit NewEntry not yet implemented."); + } + + blob_read_from_channel( &content, in, -1 ); // ^^^ Reminder: we should allow empty (zero-byte) entries, so don't exit // if read returns 0. blob_zero(&wiki); zDate = db_text(0, "SELECT datetime('now')"); @@ -753,25 +783,16 @@ for(i=strlen(zBody); i>0 && isspace(zBody[i-1]); i--){} printf("%.*s\n", i, zBody); return; }else if( strncmp(g.argv[2],"commit",n)==0 ){ - int rid; char *zPageName; if( g.argc!=4 ){ usage("commit PAGENAME"); } zPageName = g.argv[3]; - rid = db_int(0, "SELECT x.rid FROM tag t, tagxref x" - " WHERE x.tagid=t.tagid AND t.tagname='wiki-%q'" - " ORDER BY x.mtime DESC LIMIT 1", - zPageName - ); - if( ! rid ){ - fossil_fatal("wiki commit NewEntry not yet implemented."); - } - wiki_cmd_commit( zPageName, rid, stdin ); + wiki_cmd_commit( zPageName, stdin ); printf("Committed wiki page %s.\n", zPageName); }else if( strncmp(g.argv[2],"delete",n)==0 ){ if( g.argc!=5 ){ usage("delete PAGENAME"); @@ -794,7 +815,7 @@ goto wiki_cmd_usage; } return; wiki_cmd_usage: - usage("delete|export|commit|list ..."); + usage("export|commit|list ..."); }
Deleted www/build.html version [9c506f508f]
Added www/build.wiki version [d16ea958f5]
@@ -1,1 +1,73 @@ +<h1>Installing Fossil</h1> + +<p>This page describes how to build and install Fossil. The +whole process is designed to be very easy.</p> + +<h2>0.0 Using A Precompiled Binary</h2> + +<p>You can skip steps 1.0 and 2.0 below by downloading +a <a href="http://www.fossil-scm.org/download.html">FIXME precompiled binary</a> +appropriate for your platform. If you use a precompiled binary +jump immediate to step 3.0.</p> + +<h2>1.0 Obtaining The Source Code</h2> + +<p>Fossil is self-hosting, so you can obtain a ZIP archive containing +a snapshot of the latest version directly from fossil's own fossil +repository. Follow these steps:</p> + +<ol> +<li><p>Pointer your webbrowser at +<a href="http://www.fossil-scm.org/fossil/login"> +http://www.fossil-scm.org/fossil/login</a>.</p></li> + +<li><p>Log in as anonymous. The password is shown on screen. +The reason for requiring this login is to prevent spiders from +walking the entire website, downloading ZIP archives +of every historical version, and thereby soaking up all our bandwidth.</p></li> + +<li><p>Click on the +<a href="http://www.fossil-scm.org/fossil/timeline">Timeline</a> or +<a href="http://www.fossil-scm.org/fossil/leaves">Leaves</a> link at +the top of the page.</p></li> + +<li><p>Select a version of of fossil you want to download. Click on its +link. Note that you must successfully log in as "anonymous" in step 1 +above in order to see the link to the detailed version information.</p></li> + +<li><p>On the version information page, click on the +"Zip Archive" link. This link will build a ZIP archive of the +complete source code and download it to your browser.</p></li> +</ol> + +<h2>2.0 Compiling</h2> + +<p>Follow these steps to compile:</p> + +<ol> +<li value="6"> +<p>Create a directory to hold the source code. Then unzip the +ZIP archive you downloaded into that directory. You should be +in the top-level folder of that directory</p></li> + +<li><p><b>(Optional:)</b> +Edit the Makefile to set it up like you want. You probably do not +need to do anything. Do not be intimidated: There are only 5 +variables in the makefile that can be changed. The whole Makefile +is only a few dozen lines long and most of those lines are comments.</p> + +<li><p>Type "<b>make</b>" +</ol> + +<h2>3.0 Installing</h2> + +<ol> +<li value="9"> +<p>The finished binary is named "fossil". Put this binary in a +directory that is somewhere on your PATH environment variable. +It does not matter where.</p> +<li> +<p><b>(Optional:)</b> +To uninstall, just delete the binary.</p> +</ol>
Deleted www/concepts.html version [d4951ef603]
Added www/concepts.wiki version [a61e6b72f9]
@@ -1,1 +1,371 @@ +<h1 align="center"> +Fossil Concepts +</h1> + +<h2>1.0 Introduction</h2> +<p> +<a href="index.html">Fossil</a> is a +<a href="http://en.wikipedia.org/wiki/Software_configuration_management"> +software configuration management</a> system. +Fossil is software that is designed to control and track the +development of a software project and to record the history +of the project. +There are many such systems in use today. Fossil strives to +distinguish itself from the others by being extremely simple +to setup and operate.</p> + +<p>This document is intended as a quick introduction to the concepts +behind fossil.</p> + +<h2>2.0 Composition Of A Project</h2> +<img src="concept1.gif" align="right" hspace="10"> + +<p>A software project normally consists of a "source tree". +A source tree is a hierarchy of files that are used to generate +the end product. The source tree changes over time as the +software grows and expands and as features are added and bugs +are fixed. A snapshot of the source tree at any point in time +is called a "version" or "revision" or a "baseline" of the product. +In fossil, we use the name "baseline".</p> + +<p>A "repository" is a database that contains copies of all historical +versions or baselines for a project. Baselines are normally stored in the +repository in a highly space-efficient compressed format (delta encoding). +But that is an implementation detail that you the user need not worry over. +Think of the repository as a safe place where all your old baselines are +securely stored away and available for retrieval whenever you need +them.</p> + +<p>A repository in fossil is a single file on your disk. This file +might be rather large (dozens or hundreds of megabytes for a large +or long running project) but it is nevertheless just a file. You +can move it around, rename it, write it out to a memory stick, or +do anything else you normally do with files.</p> + +<p>Each source tree that is controlled by fossil is associated with +a single repository on the local disk drive. You can tie two or more +source trees to a single repository if you want (though one +tree per repository is the most common configuration.) So a +single repository can be associated with many source trees, but +each source tree is associated with only one repository.</p> + +<p>Fossil source trees may not overlap. A fossil source tree is identified +by a file named "_FOSSIL_" in the root directory of the source tree. Every +file that is a sibling of _FOSSIL_ and every file in every subfolder is +considered potentially a part of the source tree. The _FOSSIL_ file +contains (among other things) the pathname of the repository with which +the source tree is associated. On the other hand, the repository has +no record of its source trees. So you are free to delete a source tree +or move it around without consequence. But if you move or rename or +delete a repository, then any source trees associated with that repository +will no longer be able to locate their repository and will stop working.</p> + +<p>When multiple developers are working on the same project, each +developer typically has his or her own local repository and an associated +source tree in which to work. Developers share their work by +"syncing" the content of their local repositories either directly +or through a central server. Changes can "push" from the local +repository into a remote repository. Or changes can "pull" from a +remote repository into a local repository. Or one can do a "sync" +which is a shortcut for doing both a push and a pull at the same time. +Fossil also has the concept of "cloning". A "clone" is like a "pull", +except that instead of beginning with an existing local repository, +a clone begins with nothing and creates a new local repository that +is a duplicate of a remote repository.</p> + +<p>Communication between repositories is via HTTP. Remote +repositories are identified by URL. You can also point a webbrowser +at a repository and get human-readable status, history, and tracking +information about the project.</p> + +<h3>2.1 Identification Of Artifacts</h3> + +<p>A particular version of a particular file is called an "artifact". +Each artifact has a universally unique name which is the +<a href="http://en.wikipedia.org/wiki/SHA">SHA1</a> hash of the content +of that file expressed as 40 characters of lower-case hexadecimal. Such +a hash is referred to as the Universally Unique Identifier or UUID +for the artifact. The SHA1 algorithm is created with the purpose of +providing a highly forgery-resistent identifier for a file. Given any +file it is simple to find the UUID for that file. But given a +UUID it is computationally intractable to generate a file that will +have that UUID.</p> + + +<p>UUIDs look something like this:</p> + +<blockquote><b> +6089f0b563a9db0a6d90682fe47fd7161ff867c8<br> +59712614a1b3ccfd84078a37fa5b606e28434326<br> +19dbf73078be9779edd6a0156195e610f81c94f9<br> +b4104959a67175f02d6b415480be22a239f1f077<br> +997c9d6ae03ad114b2b57f04e9eeef17dcb82788 +</b></blockquote> + +<p>When referring to an artifact using fossil, you can use a unique +prefix of the UUID that is four characters or longer. This saves +a lot of typing. When displaying UUIDs, fossil will usually only +show the first 10 digits since that is normally enough to uniquely +identify a file.</p> + +<p>Changing (or adding or removing) a single byte in a file results +in a completely different UUID. And since the UUID is the name of +the artifact, making any change to a file results in a new artifact. +In this way, artifacts are immutable.</p> + +<p>A repository is really just an unordered collection of +artifacts. New artifacts can be added to the repository, but +existing artifacts can never be removed. Fossil is designed in +such a way that it can be handed a set of artifacts in any +order and it can figure out the relationship between those +artifacts and reconstruct the complete development history of +a software project.</p> + +<h3>2.2 Manifests</h3> + +<p>At the root of a source tree is a special file called the +"manifest". The manifest is a listing of all other files in +that source tree. The manifest contains the (complete) UUID +of the file and the name of the file as it appears on disk, +and thus serves as a mapping from UUID to disk name. The UUID +of the manifest is the UUID that identifies a baseline. When +you look at a "timeline" of changes in fossil, the UUID associated +with each check-in or commit is really just the UUID of the +manifest for that baseline.</p> + +<p>Fossil automatically generates a manifest whenever you "commit" +a new baseline. So this is not something that you, the developer, +need to worry with. The format of a manifest is intentionally +designed to be simple to parse, so that if +you want to read and interpret a manifest, either by hand or +with a script, that is easy to do. But you will probably never +need to do so.</p> + +<p>In addition to identifying all files in the baseline, a +manifest also contains a check-in comment, the date and time +when the baseline was established, who created the baseline, +and links to other baselines from which the current baseline +is derived. There is also a couple of checksums used to verify +the integrity of the baseline. And the whole manifest might +be PGP clearsigned.</p> + +<h3>2.3 Key concepts</h3> + +<ul> +<li>A <b>baseline</b> is a set of files arranged + in a hierarchy.</li> +<li>A <b>repository</b> keeps a record of historical baselines.</li> +<li>Repositories share their changes using <b>push</b>, <b>pull</b>, + <b>sync</b>, and <b>clone</b>.</li> +<li>A particular version of a particular file is an <b>artifact</b> + that is identified by a <b>UUID</b>.</li> +<li>Artifacts tracked by fossil are inherently immutable.</li> +<li>Fossil automatically generates a <b>manifest</b> file that identifies + every artifact in a baseline.</li> +<li>The UUID of the manifest is the UUID of the baseline.</li> +</ul> + +<h2>3.0 Fossil - The Program</h2> + +<p>Fossil is software. The implementation of fossil is in the form +of a single executable named "fossil". To install fossil on your system, +all you have to do is obtain a copy of this one executable file (either +by downloading a precompiled version or compiling it yourself) and then +putting that file somewhere on your PATH.</p> + +<p>Fossil is completely self-contained. It is not necessary to +install any other software in order to use fossil. You do <u>not</u> need +CVS, gzip, diff, rsync, Python, Perl, Tcl, Java, apache, PostgreSQL, MySQL, +SQLite, patch, or any similar software on your system in order to use +fossil effectively. You will want to have some kind of text editor +for entering check-in comments. Fossil will use whatever text editor +is identified by your VISUAL environment variable. Fossil will also +use GPG to clearsign your manifests if you happen to have it installed, +but fossil will skip that step if GPG missing from your system. +You can optionally set up fossil to use external "diff" programs, +though a perfectly functional "diff" algorithm is built it and works +find for most people.</p> + +<p>To uninstall fossil, simply delete the executable.</p> + +<p>To upgrade an older version of fossil to a newer version, just +replace the old executable with the new one. You might need to +run a one-time command to restructure your repositories after +an upgrade. Check the instructions that come with the upgrade +for details.</p> + +<p>To use fossil, simply type the name of executable in your +shell, followed by one of the various built-in commands and +arguments appropriate for that command. For example:</p> + +<blockquote><b> +fossil help +</b></blockquote> + +<p>In the next section, when we say things like "use the <b>help</b> +command" we mean to use the command name "help" as the first +token after the name of the fossil executable, as shown above.</p> + +<h2>4.0 Workflow</h2> + +<img src="concept2.gif" align="right" hspace="10"> +<ol> +<li><p> +Establish a local repository using either the <b>new</b> command +to start a new project, or the <b>clone</b> command to make a clone +of a repository for an existing project. +</p></li> + +<li><p> +Establish one or more source trees by changing your working directory +to where you want the root of the source tree to be, then issuing +the <b>open</b> command with the name of the repository file as its +argument. +</p></li> + +<li><p> +Use the <b>update</b> command followed by a UUID to cause your +source tree to change to the baseline identified by that UUID. +The <b>timeline</b> or <b>leaves</b> commands might help you to +identify an appropriate baseline. +</p></li> + +<li><p> +Edit the code. Add new files to the source tree using the <b>add</b> +command. Omit files from future baselines using the <b>rm</b> command. +(Even when you remove files from future baselines, those files continue +to exist in historical baselines.) Test your changes. +</p></li> + +<li><p> +Create a new baseline using the <b>commit</b> command. You will be prompted +for a check-in comment and also for your GPG key if you have GPG installed. +The commit copies the edits you have made in your local source +tree into your local repository. +</p></li> + +<li><p> +Share your changes with others using the <b>push</b> command. +Push causes the edits you committed into your local repository to be +pushed out into other repositories. +</p></li> + +<li><p> +When your coworkers make their own changes, you can pull those changes +into your local repository using the <b>pull</b> command. Note that +the pull command only pulls the changes into your local repository, +not into your local source tree. +</p></li> + +<li><p> +After the changes of others are in your local repository, you +can move them into your local source tree using <b>update</b>. If +you have made parallel +changes, you can merge your changes together with your coworkers changes +by do an <b>update</b> to your latest baseline, then doing a +<b>merge</b> with your coworkers latest baseline. After your +verify that the merged code is still functional, you can <b>commit</b> +a new baseline that contains both yours and your coworkers changes +and then push the new baseline back to your coworker. +</p></li> + +<li><p> +Repeat all of the above until you have generated great software. +</p></li> +</ol> + +<h3>4.1 Variations</h3> + +<p>The <b>settings</b> lets you view and modify various operating +properties of fossil. Among the available settings is "autosync" +mode. When autosync is enabled, the push and pull of content from +your local server is largely automated. Whenever you use the <b>update</b> +command, fossil first does a <b>pull</b> to see if other users have +perhaps added new baselines to the central repository. When you +<b>commit</b>, fossil also does a <b>pull</b> and issues a warning +if your check-in would cause a fork. After a <b>commit</b>, fossil +automatically does a <b>push</b> to send your changes up to the +central server.</p> + +<p>With autosync enabled, fossil works like +<a href="http://www.nongnu.org/cvs/">CVS</a> or +<a href="http://subversion.tigris.org/">Subversion</a>. +When autosync disabled, fossil works more like +<a href="http://monotone.ca/">Monotone</a>, +<a href="http://git.or.cz">GIT</a>, or +<a href="http://www.selenic.com/mercurial/wiki/">Mercurial</a>. +The fun thing about fossil is that it will work either +way, depending on your needs of the moment. You can freely switch +between these operating modes using commands like:</p> + +<blockquote> +<b>fossil setting autosync off<br /> +fossil setting autosync on</b> +</blockquote> + +<p>For additional information about autosync and other settings +using the <b>help</b> command.</p> + +<h2>5.0 Setting Up A Fossil Server</h2> + +<p>With other configuration management software, setting up a server is +a lot of work and normally takes time, patience, and a lot of system +knowledge. Fossil is designed to avoid this frustration. Setting up +a server with fossil is ridiculously easy. You have three options:</p> + +<ol> +<li><p><b>Setting up a stand-alone server</b></p> + +<p>From within your source tree just use the <b>server</b> command and +fossil will start listening for incoming requests on TCP port 8080. +You can point your webbrowser at <a href="http://localhost:8080/"> +http://localhost:8080/</a> and begin exploring. Or your coworkers +can do pushes or pulls against your server. Use the <b>--port</b> +option to the server command to specify a different TCP port. If +you do not have a local source tree, use the <b>-R</b> command-line +option to specify the repository file.</p> + +<p>A stand-alone server is a great way to set of transient connections +between coworkers for doing quick pushes or pulls. But you can also +set up a permanent stand-alone server if you prefer. Just make +arrangements for fossil to be launched with appropriate arguments +after every reboot.</p> +</li> + +<li><p><b>Setting up a CGI server</b></p> + +<p>If you have a webserver running on your machine already, you can +set up fossil to be run from CGI. Simply create an executable script +that looks something like this:</p> + +<blockquote><pre> +#!/usr/local/bin/fossil +repository: /home/me/bigproject.fossil +</pre></blockquote> + +<p>Edit this script to use whatever pathnames are appropriate for +your project. Then point your webbrowser at the script and off you +go.</p></li> + +<li><p><b>Setting up an inetd server</b></p> + +<p>If you have inetd or xinetd running on your system, you can set +those services up to launch fossil to deal with inbound TCP/IP connections +on whatever port you want. Set up inetd or xinetd to launch fossil +like this:</p> + +<blockquote><pre> +/usr/local/bin/fossil http /home/me/bigproject.fossil +</pre></blockquote> +<p>As before, change the filenames to whatever is appropriate for +your system. You can have fossil run as any user that has write +permission on the repository and on the directory that contains the +repository. But it is safer to run fossil as root. When fossil +sees that it is running as root, it automatically puts itself into +a <a href="http://en.wikipedia.org/wiki/Chroot">chroot jail</a> and +drops all privileges prior to reading any information from the client. +Since fossil is a stand-alone program, you do not need to put anything +in the chroot jail with fossil in order for it to do its job.</p> +</li> +</ol>
Deleted www/delta_encoder_algorithm.html version [2fdda57151]
Added www/delta_encoder_algorithm.wiki version [83801262d2]
@@ -1,1 +1,226 @@ +<h1 align="center"> +Fossil Delta Encoding Algorithm +</h1> + +<p>A key component for the efficient storage of multiple revisions of +a file in fossil repositories is the use of delta-compression, i.e. to +store only the changes between revisions instead of the whole +file.</p> + +<p>This document describes the encoding algorithm used by Fossil to +generate deltas. It is targeted at developers working on either +<a href="index.html">fossil</a> itself, or on tools compatible with +it. The exact format of the generated byte-sequences, while in general +not necessary to understand encoder operation, can be found in the +companion specification titled "<a href="delta_format.html">Fossil +Delta Format</a>". +</p> + +<p>The entire algorithm is inspired +by <a href="http://samba.anu.edu.au/rsync/">rsync</a>.</p> + +<a name="argresparam"><h2>1.0 Arguments, Results, and Parameters</h2> + +<p>The encoder takes two byte-sequences as input, the "original", and +the "target", and returns a single byte-sequence containing the +"delta" which transforms the original into the target upon its +application.</p> + +<p>Note that the data of a "byte-sequence" includes its length, +i.e. the number of bytes contained in the sequence.</p> + +<p>The algorithm has one parameter named "NHASH", the size of the +"sliding window" for the "rolling hash", in bytes. These two terms are +explained in the next section. The value of this parameter has to be a +power of two for the algorithm to work. For Fossil the value of this +parameter is set to "16".</p> + +<a name="operation"><h2>2.0 Operation</h2> + +<p>The algorithm is split into three phases which generate +the <a href="delta_format.html#header">header</a>, +<a href="delta_format.html#slist">segment list</a>, +and <a href="delta_format.html#trailer">trailer</a> of the delta, per +its general <a href="delta_format.html#structure">structure</a>.</p> + +<p>The two phases generating header and trailer are not covered here +as their implementation trivially follows directly from the +specification of the <a href="delta_format.html">delta format</a>.</p> + +<p>This leaves the segment-list. Its generation is done in two phases, +a pre-processing step operating on the "original" byte-sequence, +followed by the processing of the "target" byte-sequence using the +information gathered by the first step.</p> + +<a name="preprocessing"><h3>2.1 Preprocessing the original</h3> + +<p>A major part of the processing of the "target" is to find a range +in the "original" which contains the same content as found at the +current location in the "target".</p> + +<p>A naive approach to this would be to search the whole "original" +for such content. This however is very inefficient as it would search +the same parts of the "original" over and over. What is done instead +is to sample the "original" at regular intervals, compute signatures +for the sampled locations and store them in a hash table keyed by +these signatures.</p> + +<p>That is what happens in this step. The following processing step +can then the compute signature for its current location and then has +to search only a narrow set of locations in the "original" for +possible matches, namely those which have the same signature.</p> + +<p>In detail:</p> + +<ol> +<li>The "original" is split into chunks of NHASH bytes. Note that a +partial chunk of less than NHASH bytes at the end of "original" is +ignored. +</li> +<li>The <a href="#rollhash">rolling hash</a> of each chunk is +computed. +</li> +<li>A hashtable is filled, mapping from the hashes of the chunks to +the list of chunk locations having this hash. +</li> +</ol> + +<a name="processing"><h3>2.1 Processing the target</h3> + +<p>This, the main phase of the encoder, processes the target in a loop +from beginning to end. The state of the encoder is captured by two +locations, the "base" and the "slide". "base" points to the first byte +of the target for which no delta output has been generated yet, and +"slide" is the location of the window used to look in the "origin" for +commonalities. This window is NHASH bytes long.</p> + +<p>Initially both "base" and "slide" point to the beginning of the +"target". In each iteration of the loop the encoder decides whether to +<ul> +<li>emit a single instruction +to <a href="delta_format.html#copyrange">copy a range</a>, or +</li> +<li>emit two instructions, first +to <a href="delta_format.html#insertlit">insert a literal</a>, then +to <a href="delta_format.html#copyrange">copy a range</a>, or +</li> +<li>move the window forward one byte. +</li> +</ul> +</p> + +<img src="encode10.gif" align="right" hspace="10"> +<p>To make this decision the encoder first computes the hash value for +the NHASH bytes in the window and then looks at all the locations in +the "origin" which have the same signature. This part uses the hash +table created by the pre-processing step to effiently find these +locations.</p> + +<p>For each of the possible candidates the encoder finds the maximal +range of bytes common to both "origin" and "target", going forward and +backward from "slide" in the "target", and the candidate location in +the "origin". This search is constrained on the side of the "target" +by the "base" (backward search), and the end of the "target" (forward +search), and on the side of the "origin" by the beginning and end of +the "origin", respectively.</p> + +<p>There are input files for which the hash chains generated by the +pre-processing step can become very long, leading to long search times +and affecting the performance of the delta generator. To limit the +effect such long chains can have the actual search for candidates is +bounded, looking at most N candidates. Currently N is set to 250.</p> + +<p>From the ranges for all the candidates the best (= largest) common +range is taken and it is determined how many bytes are needed to +encode the bytes between the "base" and the end of that range. If the +range extended back to the "base" then this can be done in a single +copy instruction. Otherwise, i.e if there is a gap between the "base" +and the beginning of the range then two instructions are needed, one +to insert the bytes in the gap as a literal, and a copy instruction +for the range itself. The general situation at this point can be seen +in the picture to the right.</p> + +<p>If the number of bytes needed to encode both gap (if present), and +range is less than the number of bytes we are encoding the encoder +will emit the necessary instructions as described above, set "base" +and "slide" to the end of the encoded range and start the next +iteration at that point.</p> + +<p>If, on the other hand, the encoder either did not find candidate +locations in the origin, or the best range coming out of the search +needed more bytes to encode the range than there were bytes in the +range, then no instructions are emitted and the window is moved one +byte forward. The "base" is left unchanged in that case.</p> + +<p>The processing loop stops at one of two conditions: +<ol> +<li>The encoder decided to move the window forward, but the end of the +window reached the end of the "target". +</li> +<li>After the emission of instructions the new "base" location is +within NHASH bytes of end of the "target", i.e. there are no more than +at most NHASH bytes left. +</li> +</ol> +</p> + +<p>If the processing loop left bytes unencoded, i.e. "base" not +exactly at the end of the "target", as is possible for both end +conditions, then one last insert instruction is emitted to put these +bytes into the delta.<p> + +<a name="exceptions"><h2>3.0 Exceptions</h2> + +<p>If the "original" is at most NHASH bytes long no compression of +changes is possible, and the segment-list of the delta consists of a +single literal which contains the entire "target".</p> + +<p>This is actually equivalent to the second end condition of the +processing loop described in the previous section, just checked before +actually entering the loop.</p> + +<a name="rollhash"><h2>4.0 The rolling hash</h2> + +<p>The rolling hash described below and used to compute content +signatures was chosen not only for good hashing properties, but also +to enable the easy (incremental) recalculation of its value for a +sliding window, i.e. where the oldest byte is removed from the window +and a new byte is shifted in.<p> + +<a name="rhdef"><h3>4.1 Definition</h3> + +<p>Assuming an array Z of NHASH bytes (indexing starting at 0) the +hash V is computed via</p> + +<p align=center><table><tr><td> +<p><img src="encode1.gif" align="center"></p> +<p><img src="encode2.gif" align="center"></p> +<p><img src="encode3.gif" align="center"></p> +</td></tr></table></p> + +where A and B are unsigned 16-bit integers (hence the <u>mod</u>), and +V is a 32-bit unsigned integer with B as MSB, A as LSB. + +<a name="rhincr"><h3>4.2 Incremental recalculation</h3> + +<p>Assuming an array Z of NHASH bytes (indexing starting at 0) with +hash V (and components A and B), the dropped +byte <img src="encode4.gif" align="center">, and the new byte +<img src="encode5.gif" align="center"> , the new hash can +be computed incrementally via: </p> + +<p align=center><table><tr><td> +<p><img src="encode6.gif" align="center"></p> +<p><img src="encode7.gif" align="center"></p> +<p><img src="encode8.gif" align="center"></p> +</td></tr></table></p> + +<p>For A, the regular sum, it can be seen easily that this the correct +way recomputing that component.</p> +<p>For B, the weighted sum, note first that <img src="encode4.gif" +align="center"> has the weight NHASH in the sum, so that is what has +to be removed. Then adding in <img src="encode9.gif" align="center"> +adds one weight factor to all the other values of Z, and at last adds +in <img src="encode5.gif" align="center"> with weight 1, also +generating the correct new sum</p>
Deleted www/delta_format.html version [2d08a3a146]
Added www/delta_format.wiki version [03e4eaca4d]
@@ -1,1 +1,213 @@ +<h1 align="center"> +Fossil Delta Format +</h1> + +<p>A key component for the efficient storage of multiple revisions of +a file in fossil repositories is the use of delta-compression, i.e. to +store only the changes between revisions instead of the whole +file.</p> + +<p>This document describes the format used to encode such changes, +also known as "delta". It is targeted at developers working on either +<a href="index.html">fossil</a> itself, or on tools compatible with +it.</p> + +<a name="structure"><h2>1.0 Structure</h2> +<img src="delta1.gif" align="left" hspace="10"> + +<p>A delta consists of three parts, a "header", a "trailer", and a +"segment-list" between them.</p> + +<p>Both header and trailer provide information about the target +helping the decoder, and the segment-list describes how the target can +be constructed from the original.</p> + +<a name="header"><h3>1.1 Header</h3> +<img src="delta6.gif" align="left" hspace="10"> + +<p>The header consists of a single number followed by a newline +character (ASCII 0x0a). The number is the length of the target in +bytes.</p> + +<p>This means that, given a delta, the decoder can compute the size of +the target (and allocate any necessary memory based on that) by simply +reading the first line of the delta and decoding the number found +there. In other words, before it has to decode everything else.</p> + +<a name="trailer"><h3>1.2 Trailer</h3> +<img src="delta5.gif" align="left" hspace="10"> + +<p>The trailer consists of a single number followed by a semicolon (ASCII +0x3b). This number is a checksum of the target and can be used by a +decoder to verify that the delta applied correctly, reconstructing the +target from the original.</p> + +<p>The checksum is computed by treating the target as a series of +32-bit integer numbers (MSB first), and summing these up, modulo +2^32-1. A target whose length is not a multiple of 4 is padded with +0-bytes (ASCII 0x00) at the end.</p> + +<p>By putting this information at the end of the delta a decoder has +it available immediately after the target has been reconstructed +fully.</p> + +<a name="slist"><h3>1.3 Segment-List</h3> +<img src="delta2.gif" align="left" hspace="10"> + +<p>The segment-list of a delta describes how to create the target from +the original by a combination of inserting literal byte-sequences and +copying ranges of bytes from the original. This is there the +compression takes place, by encoding the large common parts of +original and target in small copy instructions.</p> + +<p>The target is constructed from beginning to end, with the data +generated by each instruction appended after the data of all previous +instructions, with no gaps.</p> + +<a name="insertlit"><h4>1.3.1 Insert Literal</h4> + +<p>A literal is specified by two elements, the size of the literal in +bytes, and the bytes of the literal itself.</p> + +<img src="delta4.gif" align="left" hspace="10"> +<p>The length is written first, followed by a colon character (ASCII +0x3a), followed by the bytes of the literal.</p> + +<a name="copyrange"><h4>1.3.2 Copy Range</h4> + +<p>A range to copy is specified by two numbers, the offset of the +first byte in the original to copy, and the size of the range, in +bytes. The size zero is special, its usage indicates that the range +extends to the end of the original.</p> + +<img src="delta3.gif" align="left" hspace="10"> +<p>The length is written first, followed by an "at" character (ASCII +0x40), then the offset, followed by a comma (ASCII 0x2c).</p> + +<a name="intcoding"><h2>2.0 Encoding of integers</h2> + +<p> +The format currently handles only 32 bit integer numbers. They are +written base-64 encoded, MSB first, and without leading +"0"-characters, except if they are significant (i.e. 0 => "0"). +</p> + +<p> +The base-64 coding is described in +<a href="http://www.ietf.org/rfc/rfc3548.txt">RFC 3548</a>. +</p> + +<a name="examples"><h2>3.0 Examples</h2> + +<a name="examplesint"><h3>3.1 Integer encoding</h3> + +<table border=1> +<tr> +<th>Value</th> +<th>Encoding</th> +</tr> +<tr> +<td>0</td> +<td>0</td> +</tr> +<tr> +<td>6246</td> +<td>1Xb</td> +</tr> +<tr> +<td>-1101438770</td> +<td>2zMM3E</td> +</tr> +</table> + +<a name="examplesdelta"><h3>3.2 Delta encoding</h3> + +<p>An example of a delta using the specified encoding is:</p> + +<table border=1><tr><td><pre> +1Xb +4E@0,2:thFN@4C,6:scenda1B@Jd,6:scenda5x@Kt,6:pieces79@Qt,F: Example: eskil~E@Y0,2zMM3E;</pre> +</td></tr></table> + +<p>This can be taken apart into the following parts:</p> + +<table border=1> +<tr><th>What </th> <th>Encoding </th><th>Meaning </th><th>Details</th></tr> +<tr><td>Header</td> <td>1Xb </td><td>Size </td><td> 6246 </td></tr> +<tr><td>S-List</td> <td>4E@0, </td><td>Copy </td><td> 270 @ 0 </td></tr> +<tr><td> </td> <td>2:th </td><td>Literal </td><td> 2 'th' </td></tr> +<tr><td> </td> <td>FN@4C, </td><td>Copy </td><td> 983 @ 268 </td></tr> +<tr><td> </td> <td>6:scenda </td><td>Literal </td><td> 6 'scenda' </td></tr> +<tr><td> </td> <td>1B@Jd, </td><td>Copy </td><td> 75 @ 1256 </td></tr> +<tr><td> </td> <td>6:scenda </td><td>Literal </td><td> 6 'scenda' </td></tr> +<tr><td> </td> <td>5x@Kt, </td><td>Copy </td><td> 380 @ 1336 </td></tr> +<tr><td> </td> <td>6:pieces </td><td>Literal </td><td> 6 'pieces' </td></tr> +<tr><td> </td> <td>79@Qt, </td><td>Copy </td><td> 457 @ 1720 </td></tr> +<tr><td> </td> <td>F: Example: eskil</td><td>Literal </td><td> 15 ' Example: eskil'</td></tr> +<tr><td> </td> <td>~E@Y0, </td><td>Copy </td><td> 4046 @ 2176 </td></tr> +<tr><td>Trailer</td><td>2zMM3E </td><td>Ckecksum</td><td> -1101438770 </td></tr> +</table> + +<p>The unified diff behind the above delta is</p> + +<table border=1><tr><td><pre> +bluepeak:(761) ~/Projects/Tcl/Fossil/Devel/devel > diff -u ../DELTA/old ../DELTA/new +--- ../DELTA/old 2007-08-23 21:14:40.000000000 -0700 ++++ ../DELTA/new 2007-08-23 21:14:33.000000000 -0700 +@@ -5,7 +5,7 @@ + + * If the server does not have write permission on the database + file, or on the directory containing the database file (and +- it is thus unable to update database because it cannot create ++ it is thus unable to update the database because it cannot create + a rollback journal) then it currently fails silently on a push. + It needs to return a helpful error. + +@@ -27,8 +27,8 @@ + * Additional information displayed for the "vinfo" page: + + + All leaves of this version that are not included in the +- decendant list. With date, user, comment, and hyperlink. +- Leaves in the decendant table should be marked as such. ++ descendant list. With date, user, comment, and hyperlink. ++ Leaves in the descendant table should be marked as such. + See the compute_leaves() function to see how to find all + leaves. + + Add file diff links to the file change list. +@@ -37,7 +37,7 @@ + + * The /xfer handler (for push, pull, and clone) does not do + delta compression. This results in excess bandwidth usage. +- There are some code in xfer.c that are sketches of ideas on ++ There are some pieces in xfer.c that are sketches of ideas on + how to do delta compression, but nothing has been implemented. + + * Enhancements to the diff and tkdiff commands in the cli. +@@ -45,7 +45,7 @@ + single file. Allow diffs against any two arbitrary versions, + not just diffs against the current check-out. Allow + configuration options to replace tkdiff with some other +- visual differ of the users choice. ++ visual differ of the users choice. Example: eskil. + + * Ticketing interface (expand this bullet) + +</pre></td></tr></table> + + + +<a name="notes"><h2>Notes</h2> +<ul> +<li>Pure text files generate a pure text delta. +</li> +<li>Binary files generate a delta that may contain some binary data. +</li> +<li>Instead of putting special instructions for general compression +into the delta-format itself, specifically the segment-list, like +run-length encoding of literals, etc. it was considered to be much +more sensible to keep the various concern separate and use a general +compression library, like <a href="http://www.zlib.net">zlib</a>, to +compress the full delta after its generation. +</li> +</ul>
Deleted www/fileformat.html version [677a2a9b19]
Added www/fileformat.wiki version [346d6f3293]
@@ -1,1 +1,334 @@ +<h1 align="center"> +Fossil File Formats +</h1> + +<p> +The global state of a fossil repository is determined by an unordered +set of artifacts. +An artifact might be a source code file, the text of a wiki page, +part of a trouble ticket, or one of several special control artifacts +used to show the relationships between other artifacts within the +project. Artifacts can be text or binary. +</p> + +<p> +Each artifact in the repository is named by its SHA1 hash. +No prefixes or meta information is added to a artifact before +its hash is computed. The name of a artifact in the repository +is exactly the same SHA1 hash that is computed by sha1sum +on the file as it exists in your source tree.</p> + +<p> +Some artifacts have a particular format which gives them special +meaning to fossil. Fossil recognizes:</p> + +<ul> +<li> Manifests </li> +<li> Clusters </li> +<li> Control Artifacts </li> +<li> Wiki Pages </li> +<li> Ticket Changes </li> +</ul> + +<p>These five artifact types are described in the sequel.</p> + +<h2>1.0 The Manifest</h2> + +<p>A manifest defines a baseline or version of the project +source tree. The manifest contains a list of artifacts for +each file in the project and the corresponding filenames, as +well as information such as parent baselines, the name of the +programmer who created the baseline, the date and time when +the baseline was created, and any check-in comments associated +with the baseline.</p> + +<p> +Any artifact in the repository that follows the syntactic rules +of a manifest is a manifest. Note that a manifest can +be both a real manifest and also a content file, though this +is rare. +</p> + +<p> +A manifest is a text file. Newline characters +(ASCII 0x0a) separate the file into "cards". +Each card begins with a single +character "card type". Zero or more arguments may follow +the card type. All arguments are separated from each other +and from the card-type character by a single space +character. There is no surplus white space between arguments +and no leading or trailing whitespace except for the newline +character that acts as the card separator. +</p> + +<p> +All cards of the manifest occur in strict sorted lexicographical order. +No card may be duplicated. +The entire manifest may be PGP clear-signed, but otherwise it +may contain no additional text or data beyond what is described here. +</p> + +<p> +Allowed cards in the manifest are as follows: +</p> + +<blockquote> +<b>C</b> <i>checkin-comment</i><br> +<b>D</b> <i>time-and-date-stamp</i><br> +<b>F</b> <i>filename</i> <i>SHA1-hash</i><br> +<b>P</b> <i>SHA1-hash</i>+<br> +<b>R</b> <i>repository-checksum</i><br> +<b>U</b> <i>user-login</i><br> +<b>Z</b> <i>manifest-checksum</i> +</blockquote> + +<p> +A manifest must have exactly one C-card. The sole argument to +the C-card is a check-in comment that describes the check-in that +the manifest defines. The check-in comment is text. The following +escape sequences are applied to the text: +A space (ASCII 0x20) is represented as "\s" (ASCII 0x5C, 0x73). A +newline (ASCII 0x0a) is "\n" (ASCII 0x6C, x6E). A backslash +(ASCII 0x5C) is represented as two backslashes "\\". Apart from +space and newline, no other whitespace characters are allowed in +the check-in comment. Nor are any unprintable characters allowed +in the comment. +</p> + +<p> +A manifest must have exactly one D-card. The sole argument to +the D-card is a date-time stamp in the ISO8601 format. The +date and time should be in coordinated universal time (UTC). +The format is: +</p> + +<blockquote> +<i>YYYY</i><b>-</b><i>MM</i><b>-</b><i>DD</i><b>T</b><i>HH</i><b>:</b><i>MM</i><b>:</b><i>SS</i> +</blockquote> + +<p> +A manifest has zero or more F-cards. Each F-card defines a file +(other than the manifest itself) which is part of the baseline that +the manifest defines. There are two arguments. The first argment +is the pathname of the file in the baseline relative to the root +of the project file hierarchy. No ".." or "." directories are allowed +within the filename. Space characters are escaped as in C-card +comment text. Backslash characters and newlines are not allowed +within filenames. The directory separator character is a forward +slash (ASCII 0x2F). The second argument to the F-card is the +full 40-character lower-case hexadecimal SHA1 hash of the content +artifact. +</p> + +<p> +A manifest has zero or one P-cards. Most manifests have one P-card. +The P-card has a varying number of arguments that +defines other manifests from which the current manifest +is derived. Each argument is an 40-character lowercase +hexadecimal SHA1 of the predecessor manifest. All arguments +to the P-card must be unique to that line. +The first predecessor is the direct ancestor of the manifest. +Other arguments define manifests with which the first was +merged to yield the current manifest. Most manifests have +a P-card with a single argument. The first manifest in the +project has no ancestors and thus has no P-card. +</p> + +<p> +A manifest may optionally have a single R-card. The R-card has +a single argument which is the MD5 checksum of all files in +the baseline except the manifest itself. The checksum is expressed +as 32-characters of lowercase hexadecimal. The checksum is +computed as follows: For each file in the baseline (except for +the manifest itself) in strict sorted lexicographical order, +take the pathname of the file relative to the root of the +repository, append a single space (ASCII 0x20), the +size of the file in ASCII decimal, a single newline +character (ASCII 0x0A), and the complete text of the file. +Compute the MD5 checksum of the the result. +</p> + +<p> +Each manifest has a single U-card. The argument to the U-card is +the login of the user who created the manifest. The login name +is encoded using the same character escapes as is used for the +check-in comment argument to the C-card. +</p> + +<p> +A manifest has an option Z-card as its last line. The argument +to the Z-card is a 32-character lowercase hexadecimal MD5 hash +of all prior lines of the manifest up to and including the newline +character that immediately preceeds the "Z". The Z-card is just +a sanity check to prove that the manifest is well-formed and +consistent. +</p> + +<h2>2.0 Clusters</h2> + +<p> +A cluster is a artifact that declares the existance of other artifacts. +Clusters are used during repository synchronization to help +reduce network traffic. +</p> + +<p> +Clusters follow a syntax that is very similar to manifests. +A Cluster is a line-oriented text file. Newline characters +(ASCII 0x0a) separate the artifact into cards. Each card begins with a single +character "card type". Zero or more arguments may follow +the card type. All arguments are separated from each other +and from the card-type character by a single space +character. There is no surplus white space between arguments +and no leading or trailing whitespace except for the newline +character that acts as the card separator. +All cards of a cluter occur in strict sorted lexicographical order. +No card may be duplicated. +The cluster may not contain additional text or data beyond +what is described here. +Unlike manifests, clusters are never PGP signed. +</p> + +<p> +Allowed cards in the cluster are as follows: +</p> + +<blockquote> +<b>M</b> <i>uuid</i><br /> +<b>Z</b> <i>checksum</i> +</blockquote> + +<p> +A cluster contains one or more "M" cards followed by a single "Z" +line. Each M card has a single argument which is the UUID of +another artifact in the repository. The Z card work exactly like +the Z card of a manifest. The argument to the Z card is the +lower-case hexadecimal representation of the MD5 checksum of all +prior cards in the cluster. Note that the Z card is required +on a cluster. +</p> + + +<h2>3.0 Control Artifacts</h2> + +<p> +Control artifacts are used to assign properties to other artifacts +within the repository. The basic format of a control artifact is +the same as a manifest or cluster. A control artifact is a text +files divided into cards by newline characters. Each card has a +single-character card type followed by arguments. Spaces separate +the card type and the arguments. No surplus whitespace is allowed. +All cards must occur in strict lexigraphical order. +</p> + +<p> +Allowed cards in a control artifact are as follows: +</p> + +<blockquote> +<b>D</b> <i>time-and-date-stamp</i><br /> +<b>T</b> (<b>+</b>|<b>-</b>|<b>*</b>)<i>tag-name uuid ?value?</i><br /> +<b>Z</b> <i>checksum</i><br /> +</blockquote> + +<p> +A control artifact must have one D card and one Z card and +one or more or more T cards. No other cards or other text is +allowed in a control artifact. Control artifacts might be PGP +clearsigned.</p> + +<p>The D card and the Z card of a control artifact are the same +as in a manifest.</p> + +<p>The T card represents a "tag" or property that is applied to +some other artifact. The T card has two or three values. The +second argument is the 40 character lowercase UUID of the artifact +to which the tag is to be applied. The +first value is the tag name. The first character of the tag +is either "+", "-", or "*". A "+" means the tag should be added +to the artifact. The "-" means the tag should be removed. +The "*" character means the tag should be added to the artifact +and all direct decendants (but not branches) of the artifact down +to but not including the first decendant that contains a +more recent "-" tag with the same name. +The optional third argument is the value of the tag. A tag +without a value is a boolean.</p> + +<p>When two or more tags with the same name are applied to the +same artifact, the tag with the latest (most recent) date is +used.</p> + +<p>Some tags have special meaning. The "comment" tag when applied +to a baseline will override the check-in comment of that baseline +for display purposes.</p> + +<h2>4.0 Wiki Pages</h2> + +<p>A wiki page is an artifact with a format similar to manifests, +clusters, and control artifacts. The artifact is divided into +cards by newline characters. The format of each card is as in +manifests, clusters, and control artifacts. Wiki artifacts accept +the following card types:</p> + +<blockquote> +<b>D</b> <i>time-and-date-stamp</i><br /> +<b>L</b> <i>wiki-title</i><br /> +<b>P</b> <i>parent-uuid</i>+<br /> +<b>U</b> <i>user-name</i><br /> +<b>W</b> <i>size</i> <b>\n</b> <i>text</i> <b>\n</b><br /> +<b>Z</b> <i>checksum</i> +</blockquote> + +<p>The D card is the date and time when the wiki page was edited. +The P card specifies the parent wiki pages, if any. The L card +gives the name of the wiki page. The U card specifies the login +of the user who made this edit to the wiki page. The Z card is +the usual checksum over the either artifact.</p> + +<p>The W card is used to specify the text of the wiki page. The +argument to the W card is an integer which is the number of bytes +of text in the wiki page. That text follows the newline character +that terminates the W card. The wiki text is always followed by one +extra newline.</p> + +<h2>5.0 Ticket Changes</h2> + +<p>A ticket-change artifact represents a change to a trouble ticket. +The following cards are allowed on a ticket change artifact:</p> + +<blockquote> +<b>D</b> <i>time-and-date-stamp</i><br /> +<b>J</b> ?<b>+</b>?<i>name value</i><br /> +<b>K</b> <i>ticket-uuid</i><br /> +<b>U</b> <i>user-name</i><br /> +<b>Z</b> <i>checksum</i> +</blockquote> + +<p> +The D card is the usual date and time stamp and represents the point +in time when the change was entered. The U card is the login of the +programmer who entered this change. The Z card is the checksum over +the entire artifact.</p> + +<p> +Every ticket has a UUID. The ticket to which this change is applied +is specified by the K card. A ticket exists if it contains one or +more changes. The first "change" to a ticket is what brings the +ticket into existance.</p> + +<p> +J cards specify changes to "fields" of the ticket. Each fossil +server has a ticket configuration which specifies the fields its +understands. This is not a limit on the fields that can appear +on the J cards, however. If a J card specifies a field that a +particular fossil server does not recognize, then that J card +is simply ignored.</p> +<p> +The first argument of the J card is the field name. The second +value is the field value. If the field name begins with "+" then +the value is appended to the prior value. Otherwise, the value +on the J card replaces any previous value of the field. +The field name and value are both encoded using the character +escapes defined for the C card of a manifest. +</p>
Deleted www/index.html version [263b2dabf4]
Modified www/index.wiki from [314475c431] to [c3f0bf922c].
@@ -1,27 +1,32 @@ -<title>Fossil SCM Homepage</title> +<h1>Fossil - A Software Configuration Management System</h1> +<p> This is a preliminary homepage for a new software configuration management system called "Fossil". The system is -<a href="http://fossil-scm.hwaci.com/fossil/timeline">self-hosting</a> on +<a href="http://www.fossil-scm.org/fossil/timeline">self-hosting</a> on <a href="http://www.hwaci.com/cgi-bin/fossil/timeline">two separate servers</a>. You can download the lastest sources compile it yourself using the instructions below. +Or you can grab +<a href="http://www.fossil-scm.org/download.html">precompiled binaries</a>. +</p> -<h2>Design Goals For Fossil:</h1> +<p>Design Goals For Fossil:</p> <ul> <li>Supports disconnected, distributed development (like <a href="http://kerneltrap.org/node/4982">git</a>, -<a href="http://www.venge.net/monotone/">monotone</a>, +<a href="http://www.monotone.ca/">monotone</a>, <a href="http://www.selenic.com/mercurial/wiki/index.cgi">mercurial</a>, or <a href="http://www.bitkeeper.com/">bitkeeper</a>) or client/server operation (like <a href="http://www.nongnu.org/cvs/">CVS</a> or -<a href="http://subversion.tigris.org/">subversion</a>) -or both at the same time</li> +<a href="http://subversion.tigris.org/">subversion</a>), +or operations on local repositories, +or all three at the same time</li> <li>Integrated bug tracking and wiki, along the lines of <a href="http://www.cvstrac.org/">CVSTrac</a> and <a href="http://www.edgewall.com/trac/">Trac</a>.</li> <li>Built-in web interface that supports deep archaeological digs through historical source code.</li> @@ -41,23 +46,23 @@ <a href="http://en.wikipedia.org/wiki/SHA-1">SHA1</a> signature.</a> Any unique prefix is sufficient to identify a file or version - usually the first 4 or 5 characters suffice.</li> <li>The file format is trival and requires nothing more complex than a text editor and the "sha1sum" command-line utility to decode.</li> -<li>Automatic <a href="selfcheck.html">self-check</a> +<li>Automatic <a href="selfcheck.wiki">self-check</a> on repository changes makes it exceedingly unlikely that data will ever be lost because of a software bug.</li> </ul> -<h2>Objectives Of Fossil:</h2> +<p>Objectives Of Fossil:</p> <ul> <li>Fossil should be ridiculously easy to -<a href="build.html">install</a> and -<a href="quickstart.html">operate</a>.</li> +<a href="build.wiki">install</a> and +<a href="quickstart.wiki">operate</a>.</li> <li>With fossil, it should be possible (and -<a href="quickstart.html#serversetup">easy</a>) to set up a project +<a href="quickstart.wiki#serversetup">easy</a>) to set up a project on an inexpensive shared-hosting ISP (example: <a href="http://www.he.net/hosting.html">Hurricane Electric</a>) that provides nothing more than web space and CGI capability. Here is <a href="http://www.hwaci.com/cgi-bin/fossil/timeline">a demo</a>.</li> <li>Fossil should provide in-depth historical and status information about the @@ -71,18 +76,38 @@ </ul> <p>Other Links:</p> <ul> -<li>The <a href="concepts.html">concepts</b> behind fossil</li> -<li><a href="build.html">Building And Installing</a></li> -<li><a href="quickstart.html">Quick Start</a> guide to using fossil -<li><a href="pop.html">Principals Of Operation</a></li> -<li>The <a href="selfcheck.html">automatic self-check</a> mechanism +<li>The <a href="concepts.wiki">concepts</b> behind fossil</li> +<li><a href="build.wiki">Building And Installing</a></li> +<li><a href="quickstart.wiki">Quick Start</a> guide to using fossil +<li><a href="pop.wiki">Principles Of Operation</a></li> +<li>The <a href="selfcheck.wiki">automatic self-check</a> mechanism helps insure project integrity.</li> -<li>The <a href="fileformat.html">file format</a> used by every content +<li>The <a href="fileformat.wiki">file format</a> used by every content file stored in the repository.</li> -<li>The <a href="delta_format.html">format of deltas</a> used to +<li>The <a href="delta_format.wiki">format of deltas</a> used to efficiently store changes between file revisions.</li> -<li>The <a href="delta_encoder_algorithm.html">encoder algorithm</a> used to +<li>The <a href="delta_encoder_algorithm.wiki">encoder algorithm</a> used to efficiently generate deltas.</li> +<li>The <a href="sync.wiki">synchronization protocol</a>.</li> +<li>There is a <a href="http://lists.fossil-scm.org:8080/cgi-bin/mailman/listinfo/fossil-users"> + mailing list</a> available for discussing fossil issues.</li> +<li>The <a href="http://www.fossil-scm.org/fossil/wiki">self-hosting + fossil wiki</a>, capable of hosting static pages and community-editable wiki + content.</li> +</ul> + +<p>Competing Projects:</p> + +<ul> +<li><a href="http://www.ditrack.org/">DITrace</a> + - A Distributed Issue Tracker</li> +<li><a href="http://www.distract.wellquite.org/">DisTract</a> + - Another distributed issue tracker based on + <a href="http://www.monotone.ca/">monotone</a>.</li> +<li><a href="http://www.monotone.ca/">Monotone</a> - distributed + SCM in a single-file executable with a single-file SQLite + database repository.</li> </ul> +
Deleted www/pop.html version [f5f93db90a]
Added www/pop.wiki version [aaab800d38]
@@ -1,1 +1,74 @@ +<h1>Principles Of Operation</h1> + +<p> +This page attempts to define the foundational principals upon +which Fossil is built. +</p> + +<ul> +<li><p>A project consists of source files, wiki pages, and +trouble tickets, and control files (collectively "artifacts"). +All historical copies of all artifacts +are saved. The project maintains an audit +trail.</p></li> + +<li><p>A project resides in one or more repositories. Each +repository is administered and operates independently +of the others.</p></li> + +<li><p>Each repository has both global and local state. The +global state is common to all repositories (or at least +has the potential to be shared in common when the +repositories are fully synchronized). The local state +for each repository is private to that repository. +The global state represents the content of the project. +The local state identifies the authorized users and +access policies for a particular repository.</p></li> + +<li><p>The global state of a repository is an unordered +collection of artifacts. Each artifact is named by +its SHA1 hash encoded in lowercase hexadecimal. +In many contexts, the name can be +abbreviated to a unique prefix. A five- or six-character +prefix usually suffices to uniquely identify a file.</p></li> + +<li><p>Because artifacts are named by their SHA1 hash, all artifacts +are immutable. Any change to the content of a artifact also +changes the hash that forms the artifacts name, thus +creating a new artifact. Both the old original version of the +artifact and the new change are preserved under different names.</p></li> + +<li><p>It is theoretically possible for two artifacts with different +content to share the same hash. But finding two such +artifacts is so incredibly difficult and unlikely that we +consider it to be an impossibility.</p></li> + +<li><p>The signature of an artifact is the SHA1 hash of the +artifact itself, exactly as it would appear in a disk file. No prefix +or meta-information about the artifact is added before computing +the hash. So you can +always find the SHA1 signature of a file by using the +"sha1sum" command-line utility.</p></li> + +<li><p>The artifacts that comprise the global state of a repository +are the complete global state of that repository. The SQLite +database that holds the repository contains additional information +about linkages between artifacts, but all of that added information +can be discarded and reconstructed by rescanning the content +artifacts.</p></li> + +<li><p>Two repositories for the same project can synchronize +their global states simply by sharing artifacts. The local +state of repositories is not normally synchronized or +shared.</p></li> + +<li><p>Every baseline has a special file at the top-level +named "manifest" which is an index of all other files in +that baseline. The manifest is automatically created and +maintained by the system.</p></li> +<li><p>The <a href="fileformat.html">file formats</a> +used by Fossil are all very simple so that with access +to the original content files, one can easily reconstruct +the content of a baseline without the need for any +special tools or software.</p></li>
Deleted www/quickstart.html version [ce3d352b11]
Added www/quickstart.wiki version [4381e8a48d]
@@ -1,1 +1,275 @@ +<h1 align="center">Fossil Quick Start</h1> + +<p>This is a guide to get you started using fossil quickly +and painlessly.</p> + +<h2>Installing</h2><blockquote> + + <p>Fossil is a single self-contained C program. You need to + either download a <a href="download.wiki">FIXME precompiled binary</a> + or <a href="build.wiki">build it yourself</a> from sources. + Install fossil by putting the fossil binary + someplace on your PATH environment variable.</p> + + </blockquote> + <h2>Cloning A Existing Repository</h2> + <blockquote> + + <p>Use this command:</p> + + <blockquote> + <b>fossil clone</b> <i>URL repository-filename</i> + </blockquote> + + <p>The <i>URL</i> above is the http URL for the fossil repository + you want to clone. You can call the new repository anything you + want - there are no naming restrictions. As an example, you can + clone the fossil repository this way:</p> + + <blockquote> + <b>fossil clone http://www.fossil-scm.org/fossil myclone.fsl</b> + </blockquote> + + <p>Note: If you are behind a restrictive firewall, you might need + to <a href="#proxy">specify an HTTP proxy</a> to use.</p> + +</blockquote><h2>Starting A New Project</h2><blockquote> + + <p>To start a new project with fossil, create a new empty repository + this way:</p> + + <blockquote> + <b>fossil new </b><i> repository-filename</i> + </blockquote> + +</blockquote><h2>Configuring Your Local Repository</h2><blockquote> + + <p>When you create a new repository, either by cloning an existing + project or create a new project of your own, you usually want to do some + local configuration. This is accomplished using a webbrowser. First + start a fossil webserver like this:</p> + + <blockquote> + <b>fossil server </b><i> repository-filename</i> + </blockquote> + + <p>This creates a mini-webserver listening on port 8080. You can + specify a different port using the <b>-port</b> option on the command-line. + After the server is running, point your webbrowser at + http://localhost:8080/ and start configuring.</p> + + <p>By default, fossil does not require a login for HTTP connections + coming in from the IP loopback address 127.0.0.1. You can, and perhaps + should, change this after you create a few users.</p> + + <p>When you are finished configuring, just press Control-C or use + the <b>kill</b> command to shut down the mini-server.</p> + +</blockquote><h2>Checking Out A Local Tree</h2><blockquote> + + <p>To work on a project in fossil, you need to check out a local + copy of the source tree. Create the directory you want to be + the root of your tree and cd into that directory. Then + to this:</p> + + <blockquote> + <b>fossil open </b><i> repository-filename</i> + </blockquote> + + <p>This leaves you with the original (empty) version of the tree + checked out. To get to the latest version, also do this:</p> + + <blockquote> + <b>fossil update</b> + </blockquote> + + <p>From anywhere underneath the root of your local tree, you + can type commands like the following to find out the status of + your local tree:</p> + + <blockquote> + <b>fossil info</b><br> + <b>fossil status</b><br> + <b>fossil changes</b><br> + <b>fossil timeline</b><br> + <b>fossil leaves</b><br> + <b>fossil ls</b><br> + <b>fossil branches</b><br> + </blockquote> + +</blockquote><h2>Making Changes</h2><blockquote> + + <p>To add new files to your project, or remove old files, use these + commands:</p> + + <blockquote> + <b>fossil add</b> <i>file...</i><br> + <b>fossil rm</b> <i>file...</i> + </blockquote> + + <p>You can also edit files freely. Once you are ready to commit + your changes, type:</p> + + <blockquote> + <b>fossil commit</b> + </blockquote> + + <p>You will be prompted for check-in comments using whatever editor + is specified by your VISUAL or EDITOR environment variable. If you + have GPG installed, you may be prompted for your GPG passphrase so + that the check-in can be signed with your GPG signature. After + this your changes will be checked in.</p> + +</blockquote><h2>Sharing Changes</h2><blockquote> + + <p>The changes you <b>commit</b> are only on your local repository. + To share those changes with other repositories, do:</p> + + <blockquote> + <b>fossil push</b> <i>URL</i> + </blockquote> + + <p>Where <i>URL</i> is the http: URL of the server repository you + want to share your changes with. If you omit the <i>URL</i> argument, + fossil will use whatever server you most recently synced with.</p> + + <p>The <b>push</b> command only sends your changes to others. To + Receive changes from others, use <b>pull</b>. Or go both ways at + once using <b>sync</b>:</p> + + <blockquote> + <b>fossil pull</b> <i>URL</i><br> + <b>fossil sync</b> <i>URL</i> + </blockquote> + + <p>When you pull in changes from others, they go into your repository, + not into your checked-out local tree. To get the changes into your + local tree, use <b>update</b>:</p> + + <blockquote> + <b>fossil update</b> <i>UUID</i> + </blockquote> + + <p>The <i>UUID</i> is some unique abbreviation to the 40-character + version ID. If you omit the <i>UUID</i> fossil moves you to the + leaf version of the branch your are currently on. If your branch + has multiple leaves, you get an error - you'll have to specify the + leaf you want using a <i>UUID</i> argument.</p> + +</blockquote><h2>Branching And Merging</h2><blockquote> + + <p>You can create branches by doing multiple commits off of the + same base version. To merge to branches back together, first + <b>update</b> to the leaf of one branch. Then do a <b>merge</b> + of the leaf of the other branch:</p> + + <blockquote> + <b>fossil merge</b> <i>UUID</i> + </blockquote> + + <p>Test to make sure your merge didn't mess up the code, then + <b>commit</b> and possibly also <b>push</b> your changes. Remember + that nobody else can see your changes until you <b>commit</b> and + if other are using a different repository you will also need to + <b>push</b>.</p> + +<a name="serversetup"> +</blockquote><h2>Setting Up A Server</h2><blockquote> + + <p>The easiest way to set up a server is:</p> + + <blockquote> + <b>fossil server</b> <i>repository-filename</i> + </blockquote> + + <p>You can omit the <i>repository-filename</i> if you are within + a checked-out local tree. This server uses port 8080 by default + but you can specify a different port using the <b>-port</b> command.</p> + + <p>Command-line servers like this are useful when two people want + to share a repository on temporary or ad-hoc basis. For a more + permanent installation, you should use either the CGI server or the + inetd server. To use the CGI server, create a CGI script that + looks something like this:</p> + + <blockquote><b> + #!/usr/local/bin/fossil<br> + repository: /home/proj1/repos1.fsl + </b></blockquote> + + <p>Adjust the paths in this CGI script to match your installation. + Now point clients at the CGI script. That's all there is to it!</p> + + <p>You can also run fossil off of inetd or xinetd. For an inetd + installation, make an entry in /etc/inetd.conf that looks something + like this:</p> + + <blockquote><b> + 80 stream tcp nowait.1000 root /usr/bin/fossil \<br> + /usr/bin/fossil http /home/proj1/repos1.fsl + </b></blockquote> + + <p>Adjust the paths to suit your installation, of course. Notice that + fossil runs as root. This is not required - you can run it as an + unprivileged user. But it is more secure to run fossil as root. + When you do run fossil as root, it automatically puts itself in a + chroot jail in the same directory as the repository, then drops + root privileges prior to reading any information from the request.</p> + +</blockquote><a name="proxy"></a><h2>HTTP Proxies</h2><blockquote> + + <p>If you are behind a restrictive firewall that requires you to use + an HTTP proxy to reach the internet, then you can configure the proxy + in three different ways. You can tell fossil about your proxy using + a command-line option on commands that use the network, + <b>sync</b>, <b>clone</b>, <b>push</b>, and <b>pull</b>.</p> + + <blockquote> + <b>fossil clone </b><i>URL</i> <b>--proxy</b> <i>Proxy-URL</i> + </blockquote> + + <p>It is annoying to have to type in the proxy URL every time you + sync your project, though, so you can make the proxy configuration + persistent using the <b>setting</b> command:</p> + + <blockquote> + <b>fossil setting proxy </b><i>Proxy-URL</i> + </blockquote> + + <p>Or, you can set the "<b>http_proxy</b>" environment variable:</p> + + <blockquote> + <b>export http_proxy=</b><i>Proxy-URL</i> + </blockquote> + + <p>To stop using the proxy, do:</p> + + <blockquote> + <b>fossil setting proxy off</b> + </blockquote> + + <p>Or unset the environment variable. The fossil setting for the + HTTP proxy takes precedence over the environment variable and the + command-line option overrides both. If you have an persistent + proxy setting that you want to override for a one-time sync, that + is easily done on the command-line. For example, to sync with + a co-workers repository on your LAN, you might type:</p> + + <blockquote> + <b>fossil sync http://192.168.1.36:8080/ --proxy off</b> + </blockquote> + +</blockquote><h2>More Hints</h2><blockquote> + + <p>Try these commands:</p> + + <blockquote><b> + fossil help<br> + fossil test-commands + </b></blockquote> + + <p>Explore and have fun!</p> + + +</blockquote>
Deleted www/selfcheck.html version [4de2807375]
Added www/selfcheck.wiki version [e5d3578c4b]
@@ -1,1 +1,92 @@ +<h1 align="center"> +Fossil Repository Integrity Self-Checks +</h1> + +<p> +Even though fossil is a relatively new project and still contains +many bugs, it is designed with features to give it a high level +of integrity so that you can have confidence that you will not +lose your files. This note describes the defensive measures that +fossil uses to help prevent file loss due to bugs. +</p> + +<p><i>Follow-up as of 2007-11-24:</i> +Fossil has been hosting itself and several other projects for +months now. Many bugs have been encountered. But, thanks in large +part to the defensive measures described here, no data has been +lost. The integrity checks are doing their job well.</p> + +<h2>Atomic Check-ins With Rollback</h2> + +<p> +The fossil repository is an +<a href="http://www.sqlite.org/">SQLite version 3</a> database file. +SQLite is very mature and stable and has been in wide-spread use for many +years, so we have little worries that it might cause repository +corruption. SQLite +databases do not corrupt even if a program or system crash or power +failure occurs in the middle of the update. If some kind of crash +does occur in the middle of a change, then all the changes are rolled +back the next time that the database is accessed. +</p> + +<p> +A check-in operation in fossil makes many changes to the repository +database. But all these changes happen within a single transaction. +If something goes wrong in the middle of the commit, then the transaction +is rolled back and the database is unchanged. +</p> + +<h2>Verification Of Delta Encodings Prior To Transaction Commit</h2> + +<p> +The content files that comprise the global state of a fossil respository +are stored in the repository as a tree. The leaves of the tree are +stored as zlib-compressed BLOBs. Interior nodes are deltas from their +decendants. A lot of encoding is going on. There is +zlib-compression which is relatively well-tested but still might +cause corruption if used improperly. And there is the relatively +new delta-encoding mechanism designed expressly for fossil. We want +to make sure that bugs in these encoding mechanisms do not lead to +loss of data. +</p> + +<p> +To increase our confidence that everything in the repository is +recoverable, fossil makes sure it can extract an exact replicate +of every content file that it changes just prior to transaction +commit. So during the course of check-in (or other repository +operation) many different files +in the repository might be modified. Some files are simply +compressed. Other files are delta encoded and then compressed. +While all this is going on, fossil makes a record of every file +that is encoded and the SHA1 hash of the original content of that +file. Then just before transaction commit, fossil re-extracts +the original content of all files that were written, computes +the SHA1 checksum again, and verifies that the checksums match. +If anything does not match up, an error +message is printed and the transaction rolls back. +</p> + +<p> +So, in other words, fossil always checks to make sure it can +re-extract a file before it commits a change to that file. +Hence bugs in fossil are unlikely to corrupt the repository in +a way that prevents us from extracting historical versions of +files. +</p> + +<h2>Checksum Over All Files In A Baseline</h2> +<p> +Manifest artifacts that define a baseline have two fields (the +R-card and Z-card) that record MD5 hashs of the manifest itself +and of all other files in the manifest. Prior to any check-in +commit, these checksums are verified to ensure that the baseline +checked in agrees exactly with what is on disk. Similarly, +the repository checksum is verified after a checkout to make +sure that the entire repository was checked out correctly. +Note that these added checks use a different hash (MD5 instead +of SHA1) in order to avoid common-mode failures in the hash +algorithm implementation. +</p>
Deleted www/sync.html version [4b7a0c1821]
Added www/sync.wiki version [5cff62ccf6]
@@ -1,1 +1,458 @@ +<h1 align="center">The Fossil Sync Protocol</h1> + +<p>Fossil supports commands <b>push</b>, <b>pull</b>, and <b>sync</b> +for transferring information from one repository to another. The +command is run on the client repository. A URL for the server repository +is specified as part of the command. This document describes what happens +behind the scenes in order to synchronize the information on the two +repositories.</p> + +<h2>1.0 Transport</h2> + +<p>All communication between client and server is via HTTP requests. +The server is listening for incoming HTTP requests. The client +issues one or more HTTP requests and receives replies for each +request.</p> + +<p>The server might be running as an independent server +using the <b>server</b> command, or it might be launched from +inetd or xinetd using the <b>http</b> command. Or the server might +be launched from CGI. The details of how the server is configured +to "listen" for incoming HTTP requests is immaterial. The important +point is that the server is listening for requests and the client +is the issuer of the requests.</p> + +<p>A single push, pull, or sync might involve multiple HTTP requests. +The client maintains state between all requests. But on the server +side, each request is independent. The server does not preserve +any information about the client from one request to the next.</p> + +<h3>1.1 Server Identification</h3> + +<p>The server is identified by a URL argument that accompanies the +push, pull, or sync command on the client. (As a convenience to +users, the URL can be omitted on the client command and the same URL +from the most recent push, pull, or sync will be reused. This saves +typing in the common case where the client does multiple syncs to +the same server.)</p> + +<p>The client modifies the URL by appending the method name "<b>/xfer</b>" +to the end. For example, if the URL specified on the client command +line is</p> + +<blockquote> +http://fossil-scm.hwaci.com/fossil +</blockquote> + +<p>Then the URL that is really used to do the synchronization will +be:</p> + +<blockquote> +http://fossil-scm.hwaci.com/fossil/xfer +</blockquote> + +<h3>1.2 HTTP Request Format</h3> + +<p>The client always sends a POST request to the server. The +general format of the POST request is as follows:</p> + +<blockquote><pre> +POST /fossil/xfer HTTP/1.0 +Host: fossil-scm.hwaci.com:80 +Content-Type: application/x-fossil +Content-Length: 4216 + +<i>content...</i> +</pre></blockquote> + +<p>In the example above, the pathname given after the POST keyword +on the first line is a copy of the URL pathname. The Host: parameter +is also taken from the URL. The content type is always either +"application/x-fossil" or "application/x-fossil-debug". The "x-fossil" +content type is the default. The only difference is that "x-fossil" +content is compressed using zlib whereas "x-fossil-debug" is sent +uncompressed.</p> + +<p>A typical reply from the server might look something like this:</p> + +<blockquote><pre> +HTTP/1.0 200 OK +Date: Mon, 10 Sep 2007 12:21:01 GMT +Connection: close +Cache-control: private +Content-Type: application/x-fossil; charset=US-ASCII +Content-Length: 265 + +<i>content...</i> +</pre></blockquote> + +<p>The content type of the reply is always the same as the content type +of the request.</p> + +<h2>2.0 Fossil Synchronization Content</h2> + +<p>A synchronization request between a client and server consists of +one or more HTTP requests as described in the previous section. This +section details the "x-fossil" content type.</p> + +<h3>2.1 Line-oriented Format</h3> + +<p>The x-fossil content type consists of zero or more "cards". Cards +are separate by the newline character ("\n"). Leading and trailing +whitespace on a card is ignored. Blank cards are ignored.</p> + +<p>Each card is divided into zero or more space separated tokens. +The first token on each card is the operator. Subsequent tokens +are arguments. The set of operators understood by servers is slightly +different from the operators understood by clients, though the two +are very similar.</p> + +<h3>2.2 Login Cards</h3> + +<p>Every message from client to server begins with one or more login +cards. Each login card has the following format:</p> + +<blockquote> +<b>login</b> <i>userid nonce signature</i> +</blockquote> + +<p>The userid is the name of the user that is requesting service +from the server. The nonce is the SHA1 hash of the remainder of +the message - all text that follows the newline character that +terminates the login card. The signature is the SHA1 hash of +the concatenation of the nonce and the users password.</p> + +<p>For each login card, the server looks up the user and verifies +that the nonce matches the SHA1 hash of the remainder of the +message. It then checks the signature hash to make sure the +signature matches. If everything +checks out, then the client is granted all privileges of the +specified user.</p> + +<p>Privileges are cumulative. There can be multiple successful +login cards. The session privileges are the bit-wise OR of the +privileges of each individual login.</p> + +<h3>2.3 File Cards</h3> + +<p>Repository content records or files are transferred using +a "file" card. File cards come in two different formats depending +on whether the file is sent directly or as a delta from some +other file.</p> + +<blockquote> +<b>file</b> <i>uuid size</i> <b>\n</b> <i>content</i><br> +<b>file</b> <i>uuid delta-uuid size</i> <b>\n</b> <i>content</i> +</blockquote> + +<p>File cards are different from all other cards in that they +followed by in-line "payload" data. The content of the file +or the file delta consists of the first <i>size</i> bytes of the +x-fossil content that immediately follow the newline that +terminates the file card. No other cards have this characteristic. +</p> + +<p>The first argument of a file card is the UUID of the file that +is being transferred. The UUID is the lower-case hexadecimal +representation of the SHA1 hash of the entire file content. +The last argument of the file card is the number of bytes of +payload that immediately follow the file card. If the file +card has only two arguments, that means the payload is the +complete content of the file. If the file card has three +arguments, then the payload is a delta and second argument is +the UUID of another file that is the source of the delta.</p> + +<p>File cards are sent in both directions: client to server and +server to client. A delta might be sent before the source of +the delta, so both client and server should remember deltas +and be able to apply them when their source arrives.</p> + +<h3>2.4 Push and Pull Cards</h3> + +<p>Among of the first cards in a client-to-server message are +the push and pull cards. The push card tell the server that +the client is pushing content. The pull card tell the server +that the client wants to pull content. In the event of a sync, +both cards are sent. The format is as follows:</p> + +<blockquote> +<b>push</b> <i>servercode projectcode</i><br> +<b>pull</b> <i>servercode projectcode</i> +</blockquote> + +<p>The <i>servercode</i> argument is the repository ID for the +client. The server will only allow the transaction to proceed +if the servercode is different from its own servercode. This +prevents a sync-loop. The <i>projectcode</i> is the identifier +of the software project that the client repository contains. +The projectcode for the client and server must match in order +for the transaction to proceed.</p> + +<p>The server will also send a push card back to the client +during a clone. This is how the client determines what project +code to put in the new repository it is constructing.</p> + +<h3>2.5 Clone Cards</h3> + +<p>A clone card works like a pull card in that it is sent from +client to server in order to tell the server that the client +wants to pull content. But unlike the pull card, the clone +card has no arguments.</p> + +<blockquote> +<b>clone</b> +</blockquote> + +<p>In response to a clone message, the server also sends the client +a push message so that the client can discover the projectcode for +this project.</p> + +<h3>2.6 Igot Cards</h3> + +<p>An igot card can be sent from either client to server or from +server to client in order to indicate that the sender holds a copy +of a particular file. The format is:</p> + +<blockquote> +<b>igot</b> <i>uuid</i> +</blockquote> + +<p>The argument of the igot card is the UUID of the file that +the sender possesses. +The receiver of an igot card will typically check to see if +it also holds the same file and if not it will request the file +using a gimme card in either the reply or in the next message.</p> + +<h3>2.7 Gimme Cards</h3> + +<p>A gimme card is sent from either client to server or from server +to client. The gimme card asks the receiver to send a particular +file back to the sender. The format of a gimme card is this:</p> + +<blockquote> +<b>gimme</b> <i>uuid</i> +</blockquote> + +<p>The argument to the gimme card is the UUID of the file that +the sender wants. The receiver will typically respond to a +gimme card by sending a file card in its reply or in the next +message.</p> + +<h3>2.8 Cookie Cards</h3> + +<p>A cookie card can be used by a server to record a small amount +of state information on a client. The server sends a cookie to the +client. The client sends the same cookie back to the server on +its next request. The cookie card has a single argument which +is its payload.</p> + +<blockquote> +<b>cookie</b> <i>payload</i> +</blockquote> + +<p>The client is not required to return the cookie to the server on +its next request. Or the client might send a cookie from a different +server on the next request. So the server must not depend on the +cookie and the server must structure the cookie payload in such +a way that it can tell if the cookie it sees is its own cookie or +a cookie from another server. (Typically the server will embed +its servercode as part of the cookie.)</p> + +<h3>2.9 Error Cards</h3> + +<p>If the server discovers anything wrong with a request, it generates +an error card in its reply. When the client sees the error card, +it displays an error message to the user and aborts the sync +operation. An error card looks like this:</p> + +<blockquote> +<b>error</b> <i>error-message</i> +</blockquote> + +<p>The error message is English text that is encoded in order to +be a single token. +A space (ASCII 0x20) is represented as "\s" (ASCII 0x5C, 0x73). A +newline (ASCII 0x0a) is "\n" (ASCII 0x6C, x6E). A backslash +(ASCII 0x5C) is represented as two backslashes "\\". Apart from +space and newline, no other whitespace characters nor any +unprintable characters are allowed in +the error message.</p> + +<h3>2.10 Unknown Cards</h3> + +<p>If either the client or the server sees a card that is not +described above, then it generates an error and aborts.</p> + +<h2>3.0 Phantoms And Clusters</h2> + +<p>When a repository knows that a file exists and knows the UUID of +that file, but it does not know the file content, then it stores that +file as a "phantom". A repository will typically create a phantom when +it receives an igot card for a file that it does not hold or when it +receives a file card that references a delta source that it does not +hold. When a server is generating its reply or when a client is +generating a new request, it will usually send gimme cards for every +phantom that it holds.</p> + +<p>A cluster is a special file that tells of the existence of other +files. Any file in the repository that follows the syntactic rules +of a cluster is considered a cluster.</p> + +<p>A cluster is a line oriented file. Each line of a cluster +is a card. The cards are separated by the newline ("\n") character. +Each card consists of a single character card type, a space, and a +single argument. No extra whitespace and no trailing or leading +whitespace is allowed. All cards in the cluster must occur in +strict lexicographical order.</p> + +<p>A cluster consists of one or more "M" cards followed by a single +"Z" card. Each M card holds an argument which is a UUID for a file +in the repository. The Z card has a single argument which is the +lower-case hexadecimal representation of the MD5 checksum of all +preceding M cards up to and included the newline character that +occurred just before the Z that starts the Z card.</p> + +<p>Any file that does not match the specifications of a cluster +exactly is not a cluster. There must be no extra whitespace in +the file. There must be one or more M cards. There must be a +single Z card with a correct MD5 checksum. And all cards must +be in strict lexicographical order.</p> + +<h3>3.1 The Unclustered Table</h3> + +<p>Every repository maintains a table named "<b>unclustered</b>" +which records the identity of every file and phantom it holds that is not +mentioned in a cluster. The entries in the unclustered table can +be thought of as leaves on a tree of files. Some of the unclustered +files will be clusters. Those clusters may contain other clusters, +which might contain still more clusters, and so forth. Beginning +with the files in the unclustered table, one can follow the chain +of clusters to find every file in the repository.</p> + +<h2>4.0 Synchronization Strategies</h2> + +<h3>4.1 Pull</h3> + +<p>A typical pull operation proceeds as shown below. Details +of the actual implementation may very slightly but the gist of +a pull is captured in the following steps:</p> + +<ol> +<li>The client sends login and pull cards. +<li>The client sends a cookie card if it has previously received a cookie. +<li>The client sends gimme cards for every phantom that it holds. +<hr> +<li>The server checks the login password and rejects the session if +the user does not have permission to pull. +<li>If the number entries in the unclustered table on the server is +greater than 100, then the server constructs a new cluster file to +cover all those unclustered entries. +<li>The server sends file cards for every gimme card it received +from the client. +<li>The server sends ihave cards for every file in its unclustered +table that is not a phantom. +<hr> +<li>The client adds the content of file cards to its repository. +<li>The client creates a phantom for every ihave card in the server reply +that mentions a file that the client does not possess. +<li>The client creates a phantom for the delta source of file cards when +the delta source is a file that the client does not possess. +</ol> + +<p>These ten steps represent a single HTTP round-trip request. +The first three steps are the processing that occurs on the client +to generate the request. The middle four steps are processing +that occurs on the server to interpret the request and generate a +reply. And the last three steps are the processing that the +client does to interpret the reply.</p> + +<p>During a pull, the client will keep sending HTTP requests +until it holds all files that exist on the server.</p> + +<p>Note that the server tries +to limit the size of its reply message to something reasonable +(usually about 1MB) so that it might stop sending file cards as +described in step (6) if the reply becomes too large.</p> + +<p>Step (5) is the only way in which new clusters can be created. +By only creating clusters on the server, we hope to minimize the +amount of overlap between clusters in the common configuration where +there is a single server and many clients. The same synchronization +protocol will continue to work even if there are multiple servers +or if servers and clients sometimes change roles. The only negative +effects of these unusual arrangements is that more than the minimum +number of clusters might be generated.</p> + +<h3>4.2 Push</h3> + +<p>A typical push operation proceeds roughly as shown below. As +with a pull, the actual implementation may vary slightly.</p> + +<ol> +<li>The client sends login and push cards. +<li>The client sends file cards for any files that it holds that have +never before been pushed - files that come from local check-ins. +<li>If this is the second or later cycle in a push, then the +client sends file cards for any gimme cards that the server sent +in the previous cycle. +<li>The client sends igot cards for every file in its unclustered table +that is not a phantom. +<hr> +<li>The server checks the login and push cards and issues an error if +anything is amiss. +<li>The server accepts file cards from the client and adds those files +to its repository. +<li>The server creates phantoms for igot cards that mention files it +does not possess or for file cards that mention delta source files that +it does not possess. +<li>The server issues gimme cards for all phantoms. +<hr> +<li>The client remembers the gimme cards from the server so that it +can generate file cards in reply on the next cycle. +</ol> + +<p>As with a pull, the steps of a push operation repeat until the +server knows all files that exist on the client. Also, as with +pull, the client attempts to keep the size of the request from +growing too large by suppressing file cards once the +size of the request reaches 1MB.</p> + +<h3>4.3 Sync</h3> + +<p>A sync is just a pull and a push that happen at the same time. +The first three steps of a pull are combined with the first five steps +of a push. Steps (4) through (7) of a pull are combined with steps +(5) through (8) of a push. And steps (8) through (10) of a pull +are combined with step (9) of a push.</p> + +<h2>5.0 Summary</h2> + +<p>Here are the key points of the synchronization protocol:</p> +<ol> +<li>The client sends one or more PUSH HTTP requests to the server. + The request and reply content type is "application/x-fossil". +<li>HTTP request content is compressed using zlib. +<li>The content of request and reply consists of cards with one + card per line. +<li>Card formats are: + <ul> + <li> <b>login</b> <i>userid nonce signature</i> + <li> <b>push</b> <i>servercode projectcode</i> + <li> <b>pull</b> <i>servercode projectcode</i> + <li> <b>clone</b> + <li> <b>file</b> <i>uuid size</i> <b>\n</b> <i>content</i> + <li> <b>file</b> <i>uuid delta-uuid size</i> <b>\n</b> <i>content</i> + <li> <b>igot</b> <i>uuid</i> + <li> <b>gimme</b> <i>uuid</i> + <li> <b>cookie</b> <i>cookie-text</i> + <li> <b>error</b> <i>error-message</i> + </ul> +<li>Phantoms are files that a repository knows exist but does not possess. +<li>Clusters are files that contain the UUIDs of other files. +<li>Clusters are created automatically on the server during a pull. +<li>Repositories keep track of all files that are not named in any +cluster and send igot messages for those files. +<li>Repositories keep track of all the phantoms they hold and send +gimme messages for those files. +</ol>