232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** Copyright (c) 2009 D. Richard Hipp 232d10b736 2009-10-18 drh: ** 232d10b736 2009-10-18 drh: ** This program is free software; you can redistribute it and/or 232d10b736 2009-10-18 drh: ** modify it under the terms of the GNU General Public 232d10b736 2009-10-18 drh: ** License version 2 as published by the Free Software Foundation. 232d10b736 2009-10-18 drh: ** 232d10b736 2009-10-18 drh: ** This program is distributed in the hope that it will be useful, 232d10b736 2009-10-18 drh: ** but WITHOUT ANY WARRANTY; without even the implied warranty of 232d10b736 2009-10-18 drh: ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 232d10b736 2009-10-18 drh: ** General Public License for more details. 232d10b736 2009-10-18 drh: ** 232d10b736 2009-10-18 drh: ** You should have received a copy of the GNU General Public 232d10b736 2009-10-18 drh: ** License along with this library; if not, write to the 232d10b736 2009-10-18 drh: ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, 232d10b736 2009-10-18 drh: ** Boston, MA 02111-1307, USA. 232d10b736 2009-10-18 drh: ** 232d10b736 2009-10-18 drh: ** Author contact information: 232d10b736 2009-10-18 drh: ** drh@hwaci.com 232d10b736 2009-10-18 drh: ** http://www.hwaci.com/drh/ 232d10b736 2009-10-18 drh: ** 232d10b736 2009-10-18 drh: ******************************************************************************* 232d10b736 2009-10-18 drh: ** 232d10b736 2009-10-18 drh: ** This file contains code to implement the "/doc" web page and related 232d10b736 2009-10-18 drh: ** pages. 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: #include "config.h" 232d10b736 2009-10-18 drh: #include "search.h" 232d10b736 2009-10-18 drh: #include <assert.h> 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: #if INTERFACE 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** A compiled search patter 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: struct Search { 232d10b736 2009-10-18 drh: int nTerm; 232d10b736 2009-10-18 drh: struct srchTerm { 232d10b736 2009-10-18 drh: char *z; 232d10b736 2009-10-18 drh: int n; 232d10b736 2009-10-18 drh: } a[8]; 232d10b736 2009-10-18 drh: }; 232d10b736 2009-10-18 drh: #endif 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** Compile a search pattern 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: Search *search_init(const char *zPattern){ 232d10b736 2009-10-18 drh: int nPattern = strlen(zPattern); 232d10b736 2009-10-18 drh: Search *p; 232d10b736 2009-10-18 drh: char *z; 232d10b736 2009-10-18 drh: int i; 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: p = malloc( nPattern + sizeof(*p) + 1); 232d10b736 2009-10-18 drh: if( p==0 ) fossil_panic("out of memory"); 232d10b736 2009-10-18 drh: z = (char*)&p[1]; 232d10b736 2009-10-18 drh: strcpy(z, zPattern); 232d10b736 2009-10-18 drh: memset(p, 0, sizeof(*p)); 232d10b736 2009-10-18 drh: while( *z && p->nTerm<sizeof(p->a)/sizeof(p->a[0]) ){ 232d10b736 2009-10-18 drh: while( !isalnum(*z) && *z ){ z++; } 232d10b736 2009-10-18 drh: if( *z==0 ) break; 232d10b736 2009-10-18 drh: p->a[p->nTerm].z = z; 232d10b736 2009-10-18 drh: for(i=1; isalnum(z[i]) || z[i]=='_'; i++){} 232d10b736 2009-10-18 drh: p->a[p->nTerm].n = i; 232d10b736 2009-10-18 drh: z += i; 232d10b736 2009-10-18 drh: p->nTerm++; 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: return p; 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** Destroy a search context. 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: void search_end(Search *p){ 232d10b736 2009-10-18 drh: free(p); 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** Theses characters constitute a word boundary 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: static const char isBoundary[] = { 232d10b736 2009-10-18 drh: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 232d10b736 2009-10-18 drh: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 232d10b736 2009-10-18 drh: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 232d10b736 2009-10-18 drh: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 232d10b736 2009-10-18 drh: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232d10b736 2009-10-18 drh: }; 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** Compare a search pattern against an input string and return a score. 232d10b736 2009-10-18 drh: ** 232d10b736 2009-10-18 drh: ** Scoring: 232d10b736 2009-10-18 drh: ** * All terms must match at least once or the score is zero 232d10b736 2009-10-18 drh: ** * 10 bonus points if the first occurrance is an exact match 232d10b736 2009-10-18 drh: ** * 1 additional point for each subsequent match of the same word 232d10b736 2009-10-18 drh: ** * Extra points of two consecutive words of the pattern are consecutive 232d10b736 2009-10-18 drh: ** in the document 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: int search_score(Search *p, const char *zDoc){ 232d10b736 2009-10-18 drh: int iPrev = 999; 232d10b736 2009-10-18 drh: int score = 10; 232d10b736 2009-10-18 drh: int iBonus = 0; 232d10b736 2009-10-18 drh: int i, j; 232d10b736 2009-10-18 drh: unsigned char seen[8]; 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: memset(seen, 0, sizeof(seen)); 232d10b736 2009-10-18 drh: for(i=0; zDoc[i]; i++){ 232d10b736 2009-10-18 drh: char c = zDoc[i]; 232d10b736 2009-10-18 drh: if( isBoundary[c&0xff] ) continue; 232d10b736 2009-10-18 drh: for(j=0; j<p->nTerm; j++){ 232d10b736 2009-10-18 drh: int n = p->a[j].n; 232d10b736 2009-10-18 drh: if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0 ){ 232d10b736 2009-10-18 drh: score += 1; 232d10b736 2009-10-18 drh: if( !seen[j] ){ 232d10b736 2009-10-18 drh: if( isBoundary[zDoc[i+n]&0xff] ) score += 10; 232d10b736 2009-10-18 drh: seen[j] = 1; 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: if( j==iPrev+1 ){ 232d10b736 2009-10-18 drh: score += iBonus; 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: i += n-1; 232d10b736 2009-10-18 drh: iPrev = j; 232d10b736 2009-10-18 drh: iBonus = 50; 232d10b736 2009-10-18 drh: break; 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: iBonus /= 2; 232d10b736 2009-10-18 drh: while( !isBoundary[zDoc[i]&0xff] ){ i++; } 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* Every term must be seen or else the score is zero */ 232d10b736 2009-10-18 drh: for(j=0; j<p->nTerm; j++){ 232d10b736 2009-10-18 drh: if( !seen[j] ) return 0; 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: return score; 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** This is an SQLite function that scores its input using 232d10b736 2009-10-18 drh: ** a pre-computed pattern. 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: static void search_score_sqlfunc( 232d10b736 2009-10-18 drh: sqlite3_context *context, 232d10b736 2009-10-18 drh: int argc, 232d10b736 2009-10-18 drh: sqlite3_value **argv 232d10b736 2009-10-18 drh: ){ 232d10b736 2009-10-18 drh: Search *p = (Search*)sqlite3_user_data(context); 232d10b736 2009-10-18 drh: int score = search_score(p, (const char*)sqlite3_value_text(argv[0])); 232d10b736 2009-10-18 drh: sqlite3_result_int(context, score); 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** Register the "score()" SQL function to score its input text 232d10b736 2009-10-18 drh: ** using the given Search object. Once this function is registered, 232d10b736 2009-10-18 drh: ** do not delete the Search object. 232d10b736 2009-10-18 drh: */ 232d10b736 2009-10-18 drh: void search_sql_setup(Search *p){ 232d10b736 2009-10-18 drh: sqlite3_create_function(g.db, "score", 1, SQLITE_UTF8, p, 232d10b736 2009-10-18 drh: search_score_sqlfunc, 0, 0); 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: /* 232d10b736 2009-10-18 drh: ** Testing the search function. 232d10b736 2009-10-18 drh: ** 20600107f1 2009-11-08 drh: ** COMMAND: search 20600107f1 2009-11-08 drh: ** %fossil search pattern... 232d10b736 2009-10-18 drh: ** 20600107f1 2009-11-08 drh: ** Search for timeline entrys matching the pattern. 232d10b736 2009-10-18 drh: */ 20600107f1 2009-11-08 drh: void search_cmd(void){ 232d10b736 2009-10-18 drh: Search *p; 232d10b736 2009-10-18 drh: Blob pattern; 232d10b736 2009-10-18 drh: int i; 232d10b736 2009-10-18 drh: Stmt q; 20600107f1 2009-11-08 drh: int iBest; 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: db_must_be_within_tree(); 232d10b736 2009-10-18 drh: if( g.argc<2 ) return; 232d10b736 2009-10-18 drh: blob_init(&pattern, g.argv[2], -1); 232d10b736 2009-10-18 drh: for(i=3; i<g.argc; i++){ 232d10b736 2009-10-18 drh: blob_appendf(&pattern, " %s", g.argv[i]); 232d10b736 2009-10-18 drh: } 232d10b736 2009-10-18 drh: p = search_init(blob_str(&pattern)); 232d10b736 2009-10-18 drh: blob_reset(&pattern); 232d10b736 2009-10-18 drh: search_sql_setup(p); 232d10b736 2009-10-18 drh: 232d10b736 2009-10-18 drh: db_multi_exec( 20600107f1 2009-11-08 drh: "CREATE TEMP TABLE srch(rid,uuid,date,comment,x);" 20600107f1 2009-11-08 drh: "CREATE INDEX srch_idx1 ON srch(x);" 20600107f1 2009-11-08 drh: "INSERT INTO srch(rid,uuid,date,comment,x)" 20600107f1 2009-11-08 drh: " SELECT blob.rid, uuid, datetime(event.mtime, 'localtime')," 20600107f1 2009-11-08 drh: " coalesce(ecomment,comment)," 20600107f1 2009-11-08 drh: " score(coalesce(ecomment,comment)) AS y" 20600107f1 2009-11-08 drh: " FROM event, blob" 20600107f1 2009-11-08 drh: " WHERE blob.rid=event.objid AND y>0;" 20600107f1 2009-11-08 drh: ); 20600107f1 2009-11-08 drh: iBest = db_int(0, "SELECT max(x) FROM srch"); 20600107f1 2009-11-08 drh: db_prepare(&q, 20600107f1 2009-11-08 drh: "SELECT rid, uuid, date, comment, 0, 0 FROM srch" 20600107f1 2009-11-08 drh: " WHERE x>%d ORDER BY x DESC, date DESC", 20600107f1 2009-11-08 drh: iBest/3 232d10b736 2009-10-18 drh: ); 20600107f1 2009-11-08 drh: print_timeline(&q, 1000); 232d10b736 2009-10-18 drh: db_finalize(&q); 232d10b736 2009-10-18 drh: }