Overview
SHA1 Hash: | 232d10b7365e2d0775972d351b8eb0b1ce93a236 |
---|---|
Date: | 2009-10-18 18:21:37 |
User: | drh |
Comment: | Begin adding a search capability; the code is not yet connected up. Also update to the latest version of SQLite. |
Timelines: | ancestors | descendants | both | trunk |
Other Links: | files | ZIP archive | manifest |
Tags And Properties
- branch=trunk inherited from [a28c83647d]
- sym-trunk inherited from [a28c83647d]
Changes
[hide diffs]Modified src/main.mk from [1c7633a14e] to [5d15c133fb].
@@ -55,10 +55,11 @@ $(SRCDIR)/rebuild.c \ $(SRCDIR)/report.c \ $(SRCDIR)/rss.c \ $(SRCDIR)/rstats.c \ $(SRCDIR)/schema.c \ + $(SRCDIR)/search.c \ $(SRCDIR)/setup.c \ $(SRCDIR)/sha1.c \ $(SRCDIR)/shun.c \ $(SRCDIR)/stat.c \ $(SRCDIR)/style.c \ @@ -123,10 +124,11 @@ rebuild_.c \ report_.c \ rss_.c \ rstats_.c \ schema_.c \ + search_.c \ setup_.c \ sha1_.c \ shun_.c \ stat_.c \ style_.c \ @@ -191,10 +193,11 @@ rebuild.o \ report.o \ rss.o \ rstats.o \ schema.o \ + search.o \ setup.o \ sha1.o \ shun.o \ stat.o \ style.o \ @@ -255,16 +258,16 @@ # noop clean: rm -f *.o *_.c $(APPNAME) VERSION.h rm -f translate makeheaders mkindex page_index.h headers - rm -f add.h allrepo.h bag.h blob.h branch.h browse.h captcha.h cgi.h checkin.h checkout.h clearsign.h clone.h comformat.h configure.h construct.h content.h db.h delta.h deltacmd.h descendants.h diff.h diffcmd.h doc.h encode.h file.h http.h http_socket.h http_transport.h info.h login.h main.h manifest.h md5.h merge.h merge3.h name.h pivot.h pqueue.h printf.h rebuild.h report.h rss.h rstats.h schema.h setup.h sha1.h shun.h stat.h style.h sync.h tag.h th_main.h timeline.h tkt.h tktsetup.h undo.h update.h url.h user.h verify.h vfile.h wiki.h wikiformat.h winhttp.h xfer.h zip.h + rm -f add.h allrepo.h bag.h blob.h branch.h browse.h captcha.h cgi.h checkin.h checkout.h clearsign.h clone.h comformat.h configure.h construct.h content.h db.h delta.h deltacmd.h descendants.h diff.h diffcmd.h doc.h encode.h file.h http.h http_socket.h http_transport.h info.h login.h main.h manifest.h md5.h merge.h merge3.h name.h pivot.h pqueue.h printf.h rebuild.h report.h rss.h rstats.h schema.h search.h setup.h sha1.h shun.h stat.h style.h sync.h tag.h th_main.h timeline.h tkt.h tktsetup.h undo.h update.h url.h user.h verify.h vfile.h wiki.h wikiformat.h winhttp.h xfer.h zip.h page_index.h: $(TRANS_SRC) mkindex ./mkindex $(TRANS_SRC) >$@ headers: page_index.h makeheaders VERSION.h - ./makeheaders add_.c:add.h allrepo_.c:allrepo.h bag_.c:bag.h blob_.c:blob.h branch_.c:branch.h browse_.c:browse.h captcha_.c:captcha.h cgi_.c:cgi.h checkin_.c:checkin.h checkout_.c:checkout.h clearsign_.c:clearsign.h clone_.c:clone.h comformat_.c:comformat.h configure_.c:configure.h construct_.c:construct.h content_.c:content.h db_.c:db.h delta_.c:delta.h deltacmd_.c:deltacmd.h descendants_.c:descendants.h diff_.c:diff.h diffcmd_.c:diffcmd.h doc_.c:doc.h encode_.c:encode.h file_.c:file.h http_.c:http.h http_socket_.c:http_socket.h http_transport_.c:http_transport.h info_.c:info.h login_.c:login.h main_.c:main.h manifest_.c:manifest.h md5_.c:md5.h merge_.c:merge.h merge3_.c:merge3.h name_.c:name.h pivot_.c:pivot.h pqueue_.c:pqueue.h printf_.c:printf.h rebuild_.c:rebuild.h report_.c:report.h rss_.c:rss.h rstats_.c:rstats.h schema_.c:schema.h setup_.c:setup.h sha1_.c:sha1.h shun_.c:shun.h stat_.c:stat.h style_.c:style.h sync_.c:sync.h tag_.c:tag.h th_main_.c:th_main.h timeline_.c:timeline.h tkt_.c:tkt.h tktsetup_.c:tktsetup.h undo_.c:undo.h update_.c:update.h url_.c:url.h user_.c:user.h verify_.c:verify.h vfile_.c:vfile.h wiki_.c:wiki.h wikiformat_.c:wikiformat.h winhttp_.c:winhttp.h xfer_.c:xfer.h zip_.c:zip.h $(SRCDIR)/sqlite3.h $(SRCDIR)/th.h VERSION.h + ./makeheaders add_.c:add.h allrepo_.c:allrepo.h bag_.c:bag.h blob_.c:blob.h branch_.c:branch.h browse_.c:browse.h captcha_.c:captcha.h cgi_.c:cgi.h checkin_.c:checkin.h checkout_.c:checkout.h clearsign_.c:clearsign.h clone_.c:clone.h comformat_.c:comformat.h configure_.c:configure.h construct_.c:construct.h content_.c:content.h db_.c:db.h delta_.c:delta.h deltacmd_.c:deltacmd.h descendants_.c:descendants.h diff_.c:diff.h diffcmd_.c:diffcmd.h doc_.c:doc.h encode_.c:encode.h file_.c:file.h http_.c:http.h http_socket_.c:http_socket.h http_transport_.c:http_transport.h info_.c:info.h login_.c:login.h main_.c:main.h manifest_.c:manifest.h md5_.c:md5.h merge_.c:merge.h merge3_.c:merge3.h name_.c:name.h pivot_.c:pivot.h pqueue_.c:pqueue.h printf_.c:printf.h rebuild_.c:rebuild.h report_.c:report.h rss_.c:rss.h rstats_.c:rstats.h schema_.c:schema.h search_.c:search.h setup_.c:setup.h sha1_.c:sha1.h shun_.c:shun.h stat_.c:stat.h style_.c:style.h sync_.c:sync.h tag_.c:tag.h th_main_.c:th_main.h timeline_.c:timeline.h tkt_.c:tkt.h tktsetup_.c:tktsetup.h undo_.c:undo.h update_.c:update.h url_.c:url.h user_.c:user.h verify_.c:verify.h vfile_.c:vfile.h wiki_.c:wiki.h wikiformat_.c:wikiformat.h winhttp_.c:winhttp.h xfer_.c:xfer.h zip_.c:zip.h $(SRCDIR)/sqlite3.h $(SRCDIR)/th.h VERSION.h touch headers headers: Makefile Makefile: add_.c: $(SRCDIR)/add.c translate ./translate $(SRCDIR)/add.c >add_.c @@ -572,10 +575,17 @@ schema.o: schema_.c schema.h $(SRCDIR)/config.h $(XTCC) -o schema.o -c schema_.c schema.h: headers +search_.c: $(SRCDIR)/search.c translate + ./translate $(SRCDIR)/search.c >search_.c + +search.o: search_.c search.h $(SRCDIR)/config.h + $(XTCC) -o search.o -c search_.c + +search.h: headers setup_.c: $(SRCDIR)/setup.c translate ./translate $(SRCDIR)/setup.c >setup_.c setup.o: setup_.c setup.h $(SRCDIR)/config.h $(XTCC) -o setup.o -c setup_.c
Modified src/makemake.tcl from [63b9b997e5] to [be92f7b0d9].
@@ -49,10 +49,11 @@ rebuild report rss rstats schema + search setup sha1 shun stat style
Added src/search.c version [b8713502ff]
@@ -1,1 +1,214 @@ +/* +** Copyright (c) 2009 D. Richard Hipp +** +** This program is free software; you can redistribute it and/or +** modify it under the terms of the GNU General Public +** License version 2 as published by the Free Software Foundation. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +** General Public License for more details. +** +** You should have received a copy of the GNU General Public +** License along with this library; if not, write to the +** Free Software Foundation, Inc., 59 Temple Place - Suite 330, +** Boston, MA 02111-1307, USA. +** +** Author contact information: +** drh@hwaci.com +** http://www.hwaci.com/drh/ +** +******************************************************************************* +** +** This file contains code to implement the "/doc" web page and related +** pages. +*/ +#include "config.h" +#include "search.h" +#include <assert.h> + +#if INTERFACE +/* +** A compiled search patter +*/ +struct Search { + int nTerm; + struct srchTerm { + char *z; + int n; + } a[8]; +}; +#endif + +/* +** Compile a search pattern +*/ +Search *search_init(const char *zPattern){ + int nPattern = strlen(zPattern); + Search *p; + char *z; + int i; + + p = malloc( nPattern + sizeof(*p) + 1); + if( p==0 ) fossil_panic("out of memory"); + z = (char*)&p[1]; + strcpy(z, zPattern); + memset(p, 0, sizeof(*p)); + while( *z && p->nTerm<sizeof(p->a)/sizeof(p->a[0]) ){ + while( !isalnum(*z) && *z ){ z++; } + if( *z==0 ) break; + p->a[p->nTerm].z = z; + for(i=1; isalnum(z[i]) || z[i]=='_'; i++){} + p->a[p->nTerm].n = i; + z += i; + p->nTerm++; + } + return p; +} + + +/* +** Destroy a search context. +*/ +void search_end(Search *p){ + free(p); +} + +/* +** Theses characters constitute a word boundary +*/ +static const char isBoundary[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* +** Compare a search pattern against an input string and return a score. +** +** Scoring: +** * All terms must match at least once or the score is zero +** * 10 bonus points if the first occurrance is an exact match +** * 1 additional point for each subsequent match of the same word +** * Extra points of two consecutive words of the pattern are consecutive +** in the document +*/ +int search_score(Search *p, const char *zDoc){ + int iPrev = 999; + int score = 10; + int iBonus = 0; + int i, j; + unsigned char seen[8]; + + memset(seen, 0, sizeof(seen)); + for(i=0; zDoc[i]; i++){ + char c = zDoc[i]; + if( isBoundary[c&0xff] ) continue; + for(j=0; j<p->nTerm; j++){ + int n = p->a[j].n; + if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0 ){ + score += 1; + if( !seen[j] ){ + if( isBoundary[zDoc[i+n]&0xff] ) score += 10; + seen[j] = 1; + } + if( j==iPrev+1 ){ + score += iBonus; + } + i += n-1; + iPrev = j; + iBonus = 50; + break; + } + } + iBonus /= 2; + while( !isBoundary[zDoc[i]&0xff] ){ i++; } + } + + /* Every term must be seen or else the score is zero */ + for(j=0; j<p->nTerm; j++){ + if( !seen[j] ) return 0; + } + + return score; +} + +/* +** This is an SQLite function that scores its input using +** a pre-computed pattern. +*/ +static void search_score_sqlfunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + Search *p = (Search*)sqlite3_user_data(context); + int score = search_score(p, (const char*)sqlite3_value_text(argv[0])); + sqlite3_result_int(context, score); +} + +/* +** Register the "score()" SQL function to score its input text +** using the given Search object. Once this function is registered, +** do not delete the Search object. +*/ +void search_sql_setup(Search *p){ + sqlite3_create_function(g.db, "score", 1, SQLITE_UTF8, p, + search_score_sqlfunc, 0, 0); +} + +/* +** Testing the search function. +** +** COMMAND: test-search +** %fossil test-search pattern... +** +** search for check-ins matching the pattern. +*/ +void search_test(void){ + Search *p; + Blob pattern; + int i; + Stmt q; + + db_must_be_within_tree(); + if( g.argc<2 ) return; + blob_init(&pattern, g.argv[2], -1); + for(i=3; i<g.argc; i++){ + blob_appendf(&pattern, " %s", g.argv[i]); + } + p = search_init(blob_str(&pattern)); + blob_reset(&pattern); + search_sql_setup(p); + + db_multi_exec( + "CREATE TEMP TABLE srch(x,text);" + "INSERT INTO srch(text) SELECT coalesce(ecomment,comment) FROM event;" + "UPDATE srch SET x=score(text);" + ); + db_prepare(&q, "SELECT x, text FROM srch WHERE x>0 ORDER BY x DESC"); + while( db_step(&q)==SQLITE_ROW ){ + int score = db_column_int(&q, 0); + const char *z = db_column_text(&q, 1); + score = search_score(p, z); + if( score ){ + printf("%5d: %s\n", score, z); + } + } + db_finalize(&q); +}
Modified src/sqlite3.c from [e1c4c35b59] to [1222ebdab0].
@@ -15,11 +15,11 @@ ** of the embedded sqlite3.h header file.) Additional code files may be needed ** if you want a wrapper to interface SQLite with your choice of programming ** language. The code for the "sqlite3" command-line shell is also in a ** separate file. This file contains only code for the core SQLite library. ** -** This amalgamation was generated on 2009-10-13 16:22:00 UTC. +** This amalgamation was generated on 2009-10-14 11:34:12 UTC. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 #ifndef SQLITE_PRIVATE # define SQLITE_PRIVATE static @@ -651,11 +651,11 @@ ** ** Requirements: [H10011] [H10014] */ #define SQLITE_VERSION "3.6.19" #define SQLITE_VERSION_NUMBER 3006019 -#define SQLITE_SOURCE_ID "2009-10-13 15:42:49 f894ebf86d6bafcd1461f104f5f677b3b6a3aa1a" +#define SQLITE_SOURCE_ID "2009-10-14 11:33:55 c1d499afc50d54b376945b4efb65c56c787a073d" /* ** CAPI3REF: Run-Time Library Version Numbers {H10020} <S60100> ** KEYWORDS: sqlite3_version ** @@ -8616,11 +8616,11 @@ ** opened savepoint. Savepoints are added to the list by the vdbe ** OP_Savepoint instruction. */ struct Savepoint { char *zName; /* Savepoint name (nul-terminated) */ - int nDeferredCons; /* Number of deferred fk violations */ + i64 nDeferredCons; /* Number of deferred fk violations */ Savepoint *pNext; /* Parent savepoint (if any) */ }; /* ** The following are used as the second parameter to sqlite3Savepoint(), @@ -83927,11 +83927,11 @@ /* Set the P5 operand of the OP_Program instruction to non-zero if ** recursive invocation of this trigger program is disallowed. Recursive ** invocation is disallowed if (a) the sub-program is really a trigger, ** not a foreign key action, and (b) the flag to enable recursive triggers ** is clear. */ - sqlite3VdbeChangeP5(v, p->zName && !(pParse->db->flags&SQLITE_RecTriggers)); + sqlite3VdbeChangeP5(v, (u8)(p->zName && !(pParse->db->flags&SQLITE_RecTriggers))); } } /* ** This is called to code the required FOR EACH ROW triggers for an operation
Modified src/sqlite3.h from [7fa89665f9] to [29f824bc4c].
@@ -119,11 +119,11 @@ ** ** Requirements: [H10011] [H10014] */ #define SQLITE_VERSION "3.6.19" #define SQLITE_VERSION_NUMBER 3006019 -#define SQLITE_SOURCE_ID "2009-10-13 15:42:49 f894ebf86d6bafcd1461f104f5f677b3b6a3aa1a" +#define SQLITE_SOURCE_ID "2009-10-14 11:33:55 c1d499afc50d54b376945b4efb65c56c787a073d" /* ** CAPI3REF: Run-Time Library Version Numbers {H10020} <S60100> ** KEYWORDS: sqlite3_version **