Check-in [232d10b736]
Not logged in
Overview

SHA1 Hash:232d10b7365e2d0775972d351b8eb0b1ce93a236
Date: 2009-10-18 18:21:37
User: drh
Comment:Begin adding a search capability; the code is not yet connected up. Also update to the latest version of SQLite.
Timelines: ancestors | descendants | both | trunk
Other Links: files | ZIP archive | manifest

Tags And Properties
Changes
[hide diffs]

Modified src/main.mk from [1c7633a14e] to [5d15c133fb].

@@ -55,10 +55,11 @@
   $(SRCDIR)/rebuild.c \
   $(SRCDIR)/report.c \
   $(SRCDIR)/rss.c \
   $(SRCDIR)/rstats.c \
   $(SRCDIR)/schema.c \
+  $(SRCDIR)/search.c \
   $(SRCDIR)/setup.c \
   $(SRCDIR)/sha1.c \
   $(SRCDIR)/shun.c \
   $(SRCDIR)/stat.c \
   $(SRCDIR)/style.c \
@@ -123,10 +124,11 @@
   rebuild_.c \
   report_.c \
   rss_.c \
   rstats_.c \
   schema_.c \
+  search_.c \
   setup_.c \
   sha1_.c \
   shun_.c \
   stat_.c \
   style_.c \
@@ -191,10 +193,11 @@
   rebuild.o \
   report.o \
   rss.o \
   rstats.o \
   schema.o \
+  search.o \
   setup.o \
   sha1.o \
   shun.o \
   stat.o \
   style.o \
@@ -255,16 +258,16 @@
 	# noop
 
 clean:
 	rm -f *.o *_.c $(APPNAME) VERSION.h
 	rm -f translate makeheaders mkindex page_index.h headers
-	rm -f add.h allrepo.h bag.h blob.h branch.h browse.h captcha.h cgi.h checkin.h checkout.h clearsign.h clone.h comformat.h configure.h construct.h content.h db.h delta.h deltacmd.h descendants.h diff.h diffcmd.h doc.h encode.h file.h http.h http_socket.h http_transport.h info.h login.h main.h manifest.h md5.h merge.h merge3.h name.h pivot.h pqueue.h printf.h rebuild.h report.h rss.h rstats.h schema.h setup.h sha1.h shun.h stat.h style.h sync.h tag.h th_main.h timeline.h tkt.h tktsetup.h undo.h update.h url.h user.h verify.h vfile.h wiki.h wikiformat.h winhttp.h xfer.h zip.h
+	rm -f add.h allrepo.h bag.h blob.h branch.h browse.h captcha.h cgi.h checkin.h checkout.h clearsign.h clone.h comformat.h configure.h construct.h content.h db.h delta.h deltacmd.h descendants.h diff.h diffcmd.h doc.h encode.h file.h http.h http_socket.h http_transport.h info.h login.h main.h manifest.h md5.h merge.h merge3.h name.h pivot.h pqueue.h printf.h rebuild.h report.h rss.h rstats.h schema.h search.h setup.h sha1.h shun.h stat.h style.h sync.h tag.h th_main.h timeline.h tkt.h tktsetup.h undo.h update.h url.h user.h verify.h vfile.h wiki.h wikiformat.h winhttp.h xfer.h zip.h
 
 page_index.h: $(TRANS_SRC) mkindex
 	./mkindex $(TRANS_SRC) >$@
 headers:	page_index.h makeheaders VERSION.h
-	./makeheaders  add_.c:add.h allrepo_.c:allrepo.h bag_.c:bag.h blob_.c:blob.h branch_.c:branch.h browse_.c:browse.h captcha_.c:captcha.h cgi_.c:cgi.h checkin_.c:checkin.h checkout_.c:checkout.h clearsign_.c:clearsign.h clone_.c:clone.h comformat_.c:comformat.h configure_.c:configure.h construct_.c:construct.h content_.c:content.h db_.c:db.h delta_.c:delta.h deltacmd_.c:deltacmd.h descendants_.c:descendants.h diff_.c:diff.h diffcmd_.c:diffcmd.h doc_.c:doc.h encode_.c:encode.h file_.c:file.h http_.c:http.h http_socket_.c:http_socket.h http_transport_.c:http_transport.h info_.c:info.h login_.c:login.h main_.c:main.h manifest_.c:manifest.h md5_.c:md5.h merge_.c:merge.h merge3_.c:merge3.h name_.c:name.h pivot_.c:pivot.h pqueue_.c:pqueue.h printf_.c:printf.h rebuild_.c:rebuild.h report_.c:report.h rss_.c:rss.h rstats_.c:rstats.h schema_.c:schema.h setup_.c:setup.h sha1_.c:sha1.h shun_.c:shun.h stat_.c:stat.h style_.c:style.h sync_.c:sync.h tag_.c:tag.h th_main_.c:th_main.h timeline_.c:timeline.h tkt_.c:tkt.h tktsetup_.c:tktsetup.h undo_.c:undo.h update_.c:update.h url_.c:url.h user_.c:user.h verify_.c:verify.h vfile_.c:vfile.h wiki_.c:wiki.h wikiformat_.c:wikiformat.h winhttp_.c:winhttp.h xfer_.c:xfer.h zip_.c:zip.h $(SRCDIR)/sqlite3.h $(SRCDIR)/th.h VERSION.h
+	./makeheaders  add_.c:add.h allrepo_.c:allrepo.h bag_.c:bag.h blob_.c:blob.h branch_.c:branch.h browse_.c:browse.h captcha_.c:captcha.h cgi_.c:cgi.h checkin_.c:checkin.h checkout_.c:checkout.h clearsign_.c:clearsign.h clone_.c:clone.h comformat_.c:comformat.h configure_.c:configure.h construct_.c:construct.h content_.c:content.h db_.c:db.h delta_.c:delta.h deltacmd_.c:deltacmd.h descendants_.c:descendants.h diff_.c:diff.h diffcmd_.c:diffcmd.h doc_.c:doc.h encode_.c:encode.h file_.c:file.h http_.c:http.h http_socket_.c:http_socket.h http_transport_.c:http_transport.h info_.c:info.h login_.c:login.h main_.c:main.h manifest_.c:manifest.h md5_.c:md5.h merge_.c:merge.h merge3_.c:merge3.h name_.c:name.h pivot_.c:pivot.h pqueue_.c:pqueue.h printf_.c:printf.h rebuild_.c:rebuild.h report_.c:report.h rss_.c:rss.h rstats_.c:rstats.h schema_.c:schema.h search_.c:search.h setup_.c:setup.h sha1_.c:sha1.h shun_.c:shun.h stat_.c:stat.h style_.c:style.h sync_.c:sync.h tag_.c:tag.h th_main_.c:th_main.h timeline_.c:timeline.h tkt_.c:tkt.h tktsetup_.c:tktsetup.h undo_.c:undo.h update_.c:update.h url_.c:url.h user_.c:user.h verify_.c:verify.h vfile_.c:vfile.h wiki_.c:wiki.h wikiformat_.c:wikiformat.h winhttp_.c:winhttp.h xfer_.c:xfer.h zip_.c:zip.h $(SRCDIR)/sqlite3.h $(SRCDIR)/th.h VERSION.h
 	touch headers
 headers: Makefile
 Makefile:
 add_.c:	$(SRCDIR)/add.c translate
 	./translate $(SRCDIR)/add.c >add_.c
@@ -572,10 +575,17 @@
 
 schema.o:	schema_.c schema.h  $(SRCDIR)/config.h
 	$(XTCC) -o schema.o -c schema_.c
 
 schema.h:	headers
+search_.c:	$(SRCDIR)/search.c translate
+	./translate $(SRCDIR)/search.c >search_.c
+
+search.o:	search_.c search.h  $(SRCDIR)/config.h
+	$(XTCC) -o search.o -c search_.c
+
+search.h:	headers
 setup_.c:	$(SRCDIR)/setup.c translate
 	./translate $(SRCDIR)/setup.c >setup_.c
 
 setup.o:	setup_.c setup.h  $(SRCDIR)/config.h
 	$(XTCC) -o setup.o -c setup_.c

Modified src/makemake.tcl from [63b9b997e5] to [be92f7b0d9].

@@ -49,10 +49,11 @@
   rebuild
   report
   rss
   rstats
   schema
+  search
   setup
   sha1
   shun
   stat
   style

Added src/search.c version [b8713502ff]

@@ -1,1 +1,214 @@
+/*
+** Copyright (c) 2009 D. Richard Hipp
+**
+** This program is free software; you can redistribute it and/or
+** modify it under the terms of the GNU General Public
+** License version 2 as published by the Free Software Foundation.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+** General Public License for more details.
+**
+** You should have received a copy of the GNU General Public
+** License along with this library; if not, write to the
+** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+** Boston, MA  02111-1307, USA.
+**
+** Author contact information:
+**   drh@hwaci.com
+**   http://www.hwaci.com/drh/
+**
+*******************************************************************************
+**
+** This file contains code to implement the "/doc" web page and related
+** pages.
+*/
+#include "config.h"
+#include "search.h"
+#include <assert.h>
+
+#if INTERFACE
+/*
+** A compiled search patter
+*/
+struct Search {
+  int nTerm;
+  struct srchTerm {
+    char *z;
+    int n;
+  } a[8];
+};
+#endif
+
+/*
+** Compile a search pattern
+*/
+Search *search_init(const char *zPattern){
+  int nPattern = strlen(zPattern);
+  Search *p;
+  char *z;
+  int i;
+
+  p = malloc( nPattern + sizeof(*p) + 1);
+  if( p==0 ) fossil_panic("out of memory");
+  z = (char*)&p[1];
+  strcpy(z, zPattern);
+  memset(p, 0, sizeof(*p));
+  while( *z && p->nTerm<sizeof(p->a)/sizeof(p->a[0]) ){
+    while( !isalnum(*z) && *z ){ z++; }
+    if( *z==0 ) break;
+    p->a[p->nTerm].z = z;
+    for(i=1; isalnum(z[i]) || z[i]=='_'; i++){}
+    p->a[p->nTerm].n = i;
+    z += i;
+    p->nTerm++;
+  }
+  return p;
+}
+
+
+/*
+** Destroy a search context.
+*/
+void search_end(Search *p){
+  free(p);
+}
+
+/*
+** Theses characters constitute a word boundary
+*/
+static const char isBoundary[] = {
+  1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 1, 1, 1, 1, 1, 1,
+  1, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 1, 1, 1, 1, 0,
+  1, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/*
+** Compare a search pattern against an input string and return a score.
+**
+** Scoring:
+**   *  All terms must match at least once or the score is zero
+**   *  10 bonus points if the first occurrance is an exact match
+**   *  1 additional point for each subsequent match of the same word
+**   *  Extra points of two consecutive words of the pattern are consecutive
+**      in the document
+*/
+int search_score(Search *p, const char *zDoc){
+  int iPrev = 999;
+  int score = 10;
+  int iBonus = 0;
+  int i, j;
+  unsigned char seen[8];
+
+  memset(seen, 0, sizeof(seen));
+  for(i=0; zDoc[i]; i++){
+    char c = zDoc[i];
+    if( isBoundary[c&0xff] ) continue;
+    for(j=0; j<p->nTerm; j++){
+      int n = p->a[j].n;
+      if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0 ){
+        score += 1;
+        if( !seen[j] ){
+          if( isBoundary[zDoc[i+n]&0xff] ) score += 10;
+          seen[j] = 1;
+        }
+        if( j==iPrev+1 ){
+          score += iBonus;
+        }
+        i += n-1;
+        iPrev = j;
+        iBonus = 50;
+        break;
+      }
+    }
+    iBonus /= 2;
+    while( !isBoundary[zDoc[i]&0xff] ){ i++; }
+  }
+
+  /* Every term must be seen or else the score is zero */
+  for(j=0; j<p->nTerm; j++){
+    if( !seen[j] ) return 0;
+  }
+
+  return score;
+}
+
+/*
+** This is an SQLite function that scores its input using
+** a pre-computed pattern.
+*/
+static void search_score_sqlfunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Search *p = (Search*)sqlite3_user_data(context);
+  int score = search_score(p, (const char*)sqlite3_value_text(argv[0]));
+  sqlite3_result_int(context, score);
+}
+
+/*
+** Register the "score()" SQL function to score its input text
+** using the given Search object.  Once this function is registered,
+** do not delete the Search object.
+*/
+void search_sql_setup(Search *p){
+  sqlite3_create_function(g.db, "score", 1, SQLITE_UTF8, p,
+     search_score_sqlfunc, 0, 0);
+}
+
+/*
+** Testing the search function.
+**
+** COMMAND: test-search
+** %fossil test-search pattern...
+**
+** search for check-ins matching the pattern.
+*/
+void search_test(void){
+  Search *p;
+  Blob pattern;
+  int i;
+  Stmt q;
+
+  db_must_be_within_tree();
+  if( g.argc<2 ) return;
+  blob_init(&pattern, g.argv[2], -1);
+  for(i=3; i<g.argc; i++){
+    blob_appendf(&pattern, " %s", g.argv[i]);
+  }
+  p = search_init(blob_str(&pattern));
+  blob_reset(&pattern);
+  search_sql_setup(p);
+
+  db_multi_exec(
+     "CREATE TEMP TABLE srch(x,text);"
+     "INSERT INTO srch(text) SELECT coalesce(ecomment,comment) FROM event;"
+     "UPDATE srch SET x=score(text);"
+  );
+  db_prepare(&q, "SELECT x, text FROM srch WHERE x>0 ORDER BY x DESC");
+  while( db_step(&q)==SQLITE_ROW ){
+    int score = db_column_int(&q, 0);
+    const char *z = db_column_text(&q, 1);
 
+    score = search_score(p, z);
+    if( score ){
+      printf("%5d: %s\n", score, z);
+    }
+  }
+  db_finalize(&q);
+}

Modified src/sqlite3.c from [e1c4c35b59] to [1222ebdab0].

@@ -15,11 +15,11 @@
 ** of the embedded sqlite3.h header file.) Additional code files may be needed
 ** if you want a wrapper to interface SQLite with your choice of programming
 ** language. The code for the "sqlite3" command-line shell is also in a
 ** separate file. This file contains only code for the core SQLite library.
 **
-** This amalgamation was generated on 2009-10-13 16:22:00 UTC.
+** This amalgamation was generated on 2009-10-14 11:34:12 UTC.
 */
 #define SQLITE_CORE 1
 #define SQLITE_AMALGAMATION 1
 #ifndef SQLITE_PRIVATE
 # define SQLITE_PRIVATE static
@@ -651,11 +651,11 @@
 **
 ** Requirements: [H10011] [H10014]
 */
 #define SQLITE_VERSION        "3.6.19"
 #define SQLITE_VERSION_NUMBER 3006019
-#define SQLITE_SOURCE_ID      "2009-10-13 15:42:49 f894ebf86d6bafcd1461f104f5f677b3b6a3aa1a"
+#define SQLITE_SOURCE_ID      "2009-10-14 11:33:55 c1d499afc50d54b376945b4efb65c56c787a073d"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers {H10020} <S60100>
 ** KEYWORDS: sqlite3_version
 **
@@ -8616,11 +8616,11 @@
 ** opened savepoint. Savepoints are added to the list by the vdbe
 ** OP_Savepoint instruction.
 */
 struct Savepoint {
   char *zName;                        /* Savepoint name (nul-terminated) */
-  int nDeferredCons;                  /* Number of deferred fk violations */
+  i64 nDeferredCons;                  /* Number of deferred fk violations */
   Savepoint *pNext;                   /* Parent savepoint (if any) */
 };
 
 /*
 ** The following are used as the second parameter to sqlite3Savepoint(),
@@ -83927,11 +83927,11 @@
     /* Set the P5 operand of the OP_Program instruction to non-zero if
     ** recursive invocation of this trigger program is disallowed. Recursive
     ** invocation is disallowed if (a) the sub-program is really a trigger,
     ** not a foreign key action, and (b) the flag to enable recursive triggers
     ** is clear.  */
-    sqlite3VdbeChangeP5(v, p->zName && !(pParse->db->flags&SQLITE_RecTriggers));
+    sqlite3VdbeChangeP5(v, (u8)(p->zName && !(pParse->db->flags&SQLITE_RecTriggers)));
   }
 }
 
 /*
 ** This is called to code the required FOR EACH ROW triggers for an operation

Modified src/sqlite3.h from [7fa89665f9] to [29f824bc4c].

@@ -119,11 +119,11 @@
 **
 ** Requirements: [H10011] [H10014]
 */
 #define SQLITE_VERSION        "3.6.19"
 #define SQLITE_VERSION_NUMBER 3006019
-#define SQLITE_SOURCE_ID      "2009-10-13 15:42:49 f894ebf86d6bafcd1461f104f5f677b3b6a3aa1a"
+#define SQLITE_SOURCE_ID      "2009-10-14 11:33:55 c1d499afc50d54b376945b4efb65c56c787a073d"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers {H10020} <S60100>
 ** KEYWORDS: sqlite3_version
 **