File Annotation
Not logged in
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** Copyright (c) 2009 D. Richard Hipp
232d10b736 2009-10-18       drh: **
232d10b736 2009-10-18       drh: ** This program is free software; you can redistribute it and/or
232d10b736 2009-10-18       drh: ** modify it under the terms of the GNU General Public
232d10b736 2009-10-18       drh: ** License version 2 as published by the Free Software Foundation.
232d10b736 2009-10-18       drh: **
232d10b736 2009-10-18       drh: ** This program is distributed in the hope that it will be useful,
232d10b736 2009-10-18       drh: ** but WITHOUT ANY WARRANTY; without even the implied warranty of
232d10b736 2009-10-18       drh: ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
232d10b736 2009-10-18       drh: ** General Public License for more details.
232d10b736 2009-10-18       drh: **
232d10b736 2009-10-18       drh: ** You should have received a copy of the GNU General Public
232d10b736 2009-10-18       drh: ** License along with this library; if not, write to the
232d10b736 2009-10-18       drh: ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
232d10b736 2009-10-18       drh: ** Boston, MA  02111-1307, USA.
232d10b736 2009-10-18       drh: **
232d10b736 2009-10-18       drh: ** Author contact information:
232d10b736 2009-10-18       drh: **   drh@hwaci.com
232d10b736 2009-10-18       drh: **   http://www.hwaci.com/drh/
232d10b736 2009-10-18       drh: **
232d10b736 2009-10-18       drh: *******************************************************************************
232d10b736 2009-10-18       drh: **
232d10b736 2009-10-18       drh: ** This file contains code to implement the "/doc" web page and related
232d10b736 2009-10-18       drh: ** pages.
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: #include "config.h"
232d10b736 2009-10-18       drh: #include "search.h"
232d10b736 2009-10-18       drh: #include <assert.h>
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: #if INTERFACE
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** A compiled search patter
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: struct Search {
232d10b736 2009-10-18       drh:   int nTerm;
232d10b736 2009-10-18       drh:   struct srchTerm {
232d10b736 2009-10-18       drh:     char *z;
232d10b736 2009-10-18       drh:     int n;
232d10b736 2009-10-18       drh:   } a[8];
232d10b736 2009-10-18       drh: };
232d10b736 2009-10-18       drh: #endif
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** Compile a search pattern
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: Search *search_init(const char *zPattern){
232d10b736 2009-10-18       drh:   int nPattern = strlen(zPattern);
232d10b736 2009-10-18       drh:   Search *p;
232d10b736 2009-10-18       drh:   char *z;
232d10b736 2009-10-18       drh:   int i;
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh:   p = malloc( nPattern + sizeof(*p) + 1);
232d10b736 2009-10-18       drh:   if( p==0 ) fossil_panic("out of memory");
232d10b736 2009-10-18       drh:   z = (char*)&p[1];
232d10b736 2009-10-18       drh:   strcpy(z, zPattern);
232d10b736 2009-10-18       drh:   memset(p, 0, sizeof(*p));
232d10b736 2009-10-18       drh:   while( *z && p->nTerm<sizeof(p->a)/sizeof(p->a[0]) ){
232d10b736 2009-10-18       drh:     while( !isalnum(*z) && *z ){ z++; }
232d10b736 2009-10-18       drh:     if( *z==0 ) break;
232d10b736 2009-10-18       drh:     p->a[p->nTerm].z = z;
232d10b736 2009-10-18       drh:     for(i=1; isalnum(z[i]) || z[i]=='_'; i++){}
232d10b736 2009-10-18       drh:     p->a[p->nTerm].n = i;
232d10b736 2009-10-18       drh:     z += i;
232d10b736 2009-10-18       drh:     p->nTerm++;
232d10b736 2009-10-18       drh:   }
232d10b736 2009-10-18       drh:   return p;
232d10b736 2009-10-18       drh: }
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** Destroy a search context.
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: void search_end(Search *p){
232d10b736 2009-10-18       drh:   free(p);
232d10b736 2009-10-18       drh: }
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** Theses characters constitute a word boundary
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: static const char isBoundary[] = {
232d10b736 2009-10-18       drh:   1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
232d10b736 2009-10-18       drh:   1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
232d10b736 2009-10-18       drh:   1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 1, 1, 1, 1, 1, 1,
232d10b736 2009-10-18       drh:   1, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 1, 1, 1, 1, 0,
232d10b736 2009-10-18       drh:   1, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 1, 1, 1, 1, 1,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh:   0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
232d10b736 2009-10-18       drh: };
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** Compare a search pattern against an input string and return a score.
232d10b736 2009-10-18       drh: **
232d10b736 2009-10-18       drh: ** Scoring:
232d10b736 2009-10-18       drh: **   *  All terms must match at least once or the score is zero
232d10b736 2009-10-18       drh: **   *  10 bonus points if the first occurrance is an exact match
232d10b736 2009-10-18       drh: **   *  1 additional point for each subsequent match of the same word
232d10b736 2009-10-18       drh: **   *  Extra points of two consecutive words of the pattern are consecutive
232d10b736 2009-10-18       drh: **      in the document
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: int search_score(Search *p, const char *zDoc){
232d10b736 2009-10-18       drh:   int iPrev = 999;
232d10b736 2009-10-18       drh:   int score = 10;
232d10b736 2009-10-18       drh:   int iBonus = 0;
232d10b736 2009-10-18       drh:   int i, j;
232d10b736 2009-10-18       drh:   unsigned char seen[8];
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh:   memset(seen, 0, sizeof(seen));
232d10b736 2009-10-18       drh:   for(i=0; zDoc[i]; i++){
232d10b736 2009-10-18       drh:     char c = zDoc[i];
232d10b736 2009-10-18       drh:     if( isBoundary[c&0xff] ) continue;
232d10b736 2009-10-18       drh:     for(j=0; j<p->nTerm; j++){
232d10b736 2009-10-18       drh:       int n = p->a[j].n;
232d10b736 2009-10-18       drh:       if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0 ){
232d10b736 2009-10-18       drh:         score += 1;
232d10b736 2009-10-18       drh:         if( !seen[j] ){
232d10b736 2009-10-18       drh:           if( isBoundary[zDoc[i+n]&0xff] ) score += 10;
232d10b736 2009-10-18       drh:           seen[j] = 1;
232d10b736 2009-10-18       drh:         }
232d10b736 2009-10-18       drh:         if( j==iPrev+1 ){
232d10b736 2009-10-18       drh:           score += iBonus;
232d10b736 2009-10-18       drh:         }
232d10b736 2009-10-18       drh:         i += n-1;
232d10b736 2009-10-18       drh:         iPrev = j;
232d10b736 2009-10-18       drh:         iBonus = 50;
232d10b736 2009-10-18       drh:         break;
232d10b736 2009-10-18       drh:       }
232d10b736 2009-10-18       drh:     }
232d10b736 2009-10-18       drh:     iBonus /= 2;
232d10b736 2009-10-18       drh:     while( !isBoundary[zDoc[i]&0xff] ){ i++; }
232d10b736 2009-10-18       drh:   }
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh:   /* Every term must be seen or else the score is zero */
232d10b736 2009-10-18       drh:   for(j=0; j<p->nTerm; j++){
232d10b736 2009-10-18       drh:     if( !seen[j] ) return 0;
232d10b736 2009-10-18       drh:   }
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh:   return score;
232d10b736 2009-10-18       drh: }
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** This is an SQLite function that scores its input using
232d10b736 2009-10-18       drh: ** a pre-computed pattern.
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: static void search_score_sqlfunc(
232d10b736 2009-10-18       drh:   sqlite3_context *context,
232d10b736 2009-10-18       drh:   int argc,
232d10b736 2009-10-18       drh:   sqlite3_value **argv
232d10b736 2009-10-18       drh: ){
232d10b736 2009-10-18       drh:   Search *p = (Search*)sqlite3_user_data(context);
232d10b736 2009-10-18       drh:   int score = search_score(p, (const char*)sqlite3_value_text(argv[0]));
232d10b736 2009-10-18       drh:   sqlite3_result_int(context, score);
232d10b736 2009-10-18       drh: }
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** Register the "score()" SQL function to score its input text
232d10b736 2009-10-18       drh: ** using the given Search object.  Once this function is registered,
232d10b736 2009-10-18       drh: ** do not delete the Search object.
232d10b736 2009-10-18       drh: */
232d10b736 2009-10-18       drh: void search_sql_setup(Search *p){
232d10b736 2009-10-18       drh:   sqlite3_create_function(g.db, "score", 1, SQLITE_UTF8, p,
232d10b736 2009-10-18       drh:      search_score_sqlfunc, 0, 0);
232d10b736 2009-10-18       drh: }
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh: /*
232d10b736 2009-10-18       drh: ** Testing the search function.
232d10b736 2009-10-18       drh: **
20600107f1 2009-11-08       drh: ** COMMAND: search
20600107f1 2009-11-08       drh: ** %fossil search pattern...
232d10b736 2009-10-18       drh: **
20600107f1 2009-11-08       drh: ** Search for timeline entrys matching the pattern.
232d10b736 2009-10-18       drh: */
20600107f1 2009-11-08       drh: void search_cmd(void){
232d10b736 2009-10-18       drh:   Search *p;
232d10b736 2009-10-18       drh:   Blob pattern;
232d10b736 2009-10-18       drh:   int i;
232d10b736 2009-10-18       drh:   Stmt q;
20600107f1 2009-11-08       drh:   int iBest;
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh:   db_must_be_within_tree();
232d10b736 2009-10-18       drh:   if( g.argc<2 ) return;
232d10b736 2009-10-18       drh:   blob_init(&pattern, g.argv[2], -1);
232d10b736 2009-10-18       drh:   for(i=3; i<g.argc; i++){
232d10b736 2009-10-18       drh:     blob_appendf(&pattern, " %s", g.argv[i]);
232d10b736 2009-10-18       drh:   }
232d10b736 2009-10-18       drh:   p = search_init(blob_str(&pattern));
232d10b736 2009-10-18       drh:   blob_reset(&pattern);
232d10b736 2009-10-18       drh:   search_sql_setup(p);
232d10b736 2009-10-18       drh: 
232d10b736 2009-10-18       drh:   db_multi_exec(
20600107f1 2009-11-08       drh:      "CREATE TEMP TABLE srch(rid,uuid,date,comment,x);"
20600107f1 2009-11-08       drh:      "CREATE INDEX srch_idx1 ON srch(x);"
20600107f1 2009-11-08       drh:      "INSERT INTO srch(rid,uuid,date,comment,x)"
20600107f1 2009-11-08       drh:      "   SELECT blob.rid, uuid, datetime(event.mtime, 'localtime'),"
20600107f1 2009-11-08       drh:      "          coalesce(ecomment,comment),"
20600107f1 2009-11-08       drh:      "          score(coalesce(ecomment,comment)) AS y"
20600107f1 2009-11-08       drh:      "     FROM event, blob"
20600107f1 2009-11-08       drh:      "    WHERE blob.rid=event.objid AND y>0;"
20600107f1 2009-11-08       drh:   );
20600107f1 2009-11-08       drh:   iBest = db_int(0, "SELECT max(x) FROM srch");
20600107f1 2009-11-08       drh:   db_prepare(&q,
20600107f1 2009-11-08       drh:     "SELECT rid, uuid, date, comment, 0, 0 FROM srch"
20600107f1 2009-11-08       drh:     " WHERE x>%d ORDER BY x DESC, date DESC",
20600107f1 2009-11-08       drh:     iBest/3
232d10b736 2009-10-18       drh:   );
20600107f1 2009-11-08       drh:   print_timeline(&q, 1000);
232d10b736 2009-10-18       drh:   db_finalize(&q);
232d10b736 2009-10-18       drh: }