dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Copyright (c) 2007 D. Richard Hipp dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This program is free software; you can redistribute it and/or dbda8d6ce9 2007-07-21 drh: ** modify it under the terms of the GNU General Public dbda8d6ce9 2007-07-21 drh: ** License version 2 as published by the Free Software Foundation. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This program is distributed in the hope that it will be useful, dbda8d6ce9 2007-07-21 drh: ** but WITHOUT ANY WARRANTY; without even the implied warranty of dbda8d6ce9 2007-07-21 drh: ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dbda8d6ce9 2007-07-21 drh: ** General Public License for more details. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** You should have received a copy of the GNU General Public dbda8d6ce9 2007-07-21 drh: ** License along with this library; if not, write to the dbda8d6ce9 2007-07-21 drh: ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, dbda8d6ce9 2007-07-21 drh: ** Boston, MA 02111-1307, USA. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Author contact information: dbda8d6ce9 2007-07-21 drh: ** drh@hwaci.com dbda8d6ce9 2007-07-21 drh: ** http://www.hwaci.com/drh/ dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ******************************************************************************* dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This file contains code for parsing URLs that appear on the command-line dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: #include "config.h" dbda8d6ce9 2007-07-21 drh: #include "url.h" dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Parse the given URL. Populate variables in the global "g" structure. dbda8d6ce9 2007-07-21 drh: ** 9e274a2e7b 2009-09-12 drh: ** g.urlIsFile True if FILE: 9e274a2e7b 2009-09-12 drh: ** g.urlIsHttps True if HTTPS: 9e274a2e7b 2009-09-12 drh: ** g.urlProtocol "http" or "https" or "file" 9e274a2e7b 2009-09-12 drh: ** g.urlName Hostname for HTTP: or HTTPS:. Filename for FILE: 9e274a2e7b 2009-09-12 drh: ** g.urlPort TCP port number for HTTP or HTTPS. 9e274a2e7b 2009-09-12 drh: ** g.urlDfltPort Default TCP port number (80 or 443). 9e274a2e7b 2009-09-12 drh: ** g.urlPath Path name for HTTP or HTTPS. e621b6dbe3 2007-07-30 drh: ** g.urlUser Userid. e621b6dbe3 2007-07-30 drh: ** g.urlPasswd Password. 9e274a2e7b 2009-09-12 drh: ** g.urlHostname HOST:PORT or just HOST if port is the default. 9e274a2e7b 2009-09-12 drh: ** g.urlCanonical The URL in canonical form, omitting userid/password 3dcaed8d86 2007-07-28 dan: ** e621b6dbe3 2007-07-30 drh: ** HTTP url format is: 3dcaed8d86 2007-07-28 dan: ** e621b6dbe3 2007-07-30 drh: ** http://userid:password@host:port/path?query#fragment dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void url_parse(const char *zUrl){ dbda8d6ce9 2007-07-21 drh: int i, j, c; e621b6dbe3 2007-07-30 drh: char *zFile = 0; 797d680ef5 2009-01-13 drh: if( strncmp(zUrl, "http://", 7)==0 || strncmp(zUrl, "https://", 8)==0 ){ 797d680ef5 2009-01-13 drh: int iStart; dbda8d6ce9 2007-07-21 drh: g.urlIsFile = 0; 797d680ef5 2009-01-13 drh: if( zUrl[4]=='s' ){ 797d680ef5 2009-01-13 drh: g.urlIsHttps = 1; 797d680ef5 2009-01-13 drh: g.urlProtocol = "https"; 797d680ef5 2009-01-13 drh: g.urlDfltPort = 443; 797d680ef5 2009-01-13 drh: iStart = 8; 797d680ef5 2009-01-13 drh: }else{ 797d680ef5 2009-01-13 drh: g.urlIsHttps = 0; 797d680ef5 2009-01-13 drh: g.urlProtocol = "http"; 797d680ef5 2009-01-13 drh: g.urlDfltPort = 80; 797d680ef5 2009-01-13 drh: iStart = 7; 797d680ef5 2009-01-13 drh: } 797d680ef5 2009-01-13 drh: for(i=iStart; (c=zUrl[i])!=0 && c!='/' && c!='@'; i++){} e621b6dbe3 2007-07-30 drh: if( c=='@' ){ 797d680ef5 2009-01-13 drh: for(j=iStart; j<i && zUrl[j]!=':'; j++){} 797d680ef5 2009-01-13 drh: g.urlUser = mprintf("%.*s", j-iStart, &zUrl[iStart]); e621b6dbe3 2007-07-30 drh: if( j<i ){ e621b6dbe3 2007-07-30 drh: g.urlPasswd = mprintf("%.*s", i-j-1, &zUrl[j+1]); e621b6dbe3 2007-07-30 drh: } e621b6dbe3 2007-07-30 drh: for(j=i+1; (c=zUrl[j])!=0 && c!='/' && c!=':'; j++){} e621b6dbe3 2007-07-30 drh: g.urlName = mprintf("%.*s", j-i-1, &zUrl[i+1]); e621b6dbe3 2007-07-30 drh: i = j; e621b6dbe3 2007-07-30 drh: }else{ 797d680ef5 2009-01-13 drh: for(i=iStart; (c=zUrl[i])!=0 && c!='/' && c!=':'; i++){} 797d680ef5 2009-01-13 drh: g.urlName = mprintf("%.*s", i-iStart, &zUrl[iStart]); e621b6dbe3 2007-07-30 drh: } dbda8d6ce9 2007-07-21 drh: for(j=0; g.urlName[j]; j++){ g.urlName[j] = tolower(g.urlName[j]); } dbda8d6ce9 2007-07-21 drh: if( c==':' ){ dbda8d6ce9 2007-07-21 drh: g.urlPort = 0; dbda8d6ce9 2007-07-21 drh: i++; dbda8d6ce9 2007-07-21 drh: while( (c = zUrl[i])!=0 && isdigit(c) ){ dbda8d6ce9 2007-07-21 drh: g.urlPort = g.urlPort*10 + c - '0'; dbda8d6ce9 2007-07-21 drh: i++; dbda8d6ce9 2007-07-21 drh: } 1dbf332352 2008-05-05 drh: g.urlHostname = mprintf("%s:%d", g.urlName, g.urlPort); dbda8d6ce9 2007-07-21 drh: }else{ 797d680ef5 2009-01-13 drh: g.urlPort = g.urlDfltPort; 1dbf332352 2008-05-05 drh: g.urlHostname = g.urlName; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: g.urlPath = mprintf(&zUrl[i]); dbda8d6ce9 2007-07-21 drh: dehttpize(g.urlName); dbda8d6ce9 2007-07-21 drh: dehttpize(g.urlPath); 47d8fc4944 2009-08-01 drh: if( g.urlDfltPort==g.urlPort ){ 47d8fc4944 2009-08-01 drh: g.urlCanonical = mprintf("%s://%T%T", 47d8fc4944 2009-08-01 drh: g.urlProtocol, g.urlName, g.urlPath); 47d8fc4944 2009-08-01 drh: }else{ 47d8fc4944 2009-08-01 drh: g.urlCanonical = mprintf("%s://%T:%d%T", 47d8fc4944 2009-08-01 drh: g.urlProtocol, g.urlName, g.urlPort, g.urlPath); 47d8fc4944 2009-08-01 drh: } dbda8d6ce9 2007-07-21 drh: }else if( strncmp(zUrl, "file:", 5)==0 ){ dbda8d6ce9 2007-07-21 drh: g.urlIsFile = 1; dbda8d6ce9 2007-07-21 drh: if( zUrl[5]=='/' && zUrl[6]=='/' ){ dbda8d6ce9 2007-07-21 drh: i = 7; dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: i = 5; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zFile = mprintf("%s", &zUrl[i]); dbda8d6ce9 2007-07-21 drh: }else if( file_isfile(zUrl) ){ dbda8d6ce9 2007-07-21 drh: g.urlIsFile = 1; dbda8d6ce9 2007-07-21 drh: zFile = mprintf("%s", zUrl); dbda8d6ce9 2007-07-21 drh: }else if( file_isdir(zUrl)==1 ){ dbda8d6ce9 2007-07-21 drh: zFile = mprintf("%s/FOSSIL", zUrl); dbda8d6ce9 2007-07-21 drh: if( file_isfile(zFile) ){ dbda8d6ce9 2007-07-21 drh: g.urlIsFile = 1; dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: free(zFile); dbda8d6ce9 2007-07-21 drh: fossil_panic("unknown repository: %s", zUrl); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: fossil_panic("unknown repository: %s", zUrl); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: if( g.urlIsFile ){ dbda8d6ce9 2007-07-21 drh: Blob cfile; dbda8d6ce9 2007-07-21 drh: dehttpize(zFile); dbda8d6ce9 2007-07-21 drh: file_canonical_name(zFile, &cfile); dbda8d6ce9 2007-07-21 drh: free(zFile); 945ecd1a8b 2009-04-11 drh: g.urlProtocol = "file"; 945ecd1a8b 2009-04-11 drh: g.urlPath = ""; dbda8d6ce9 2007-07-21 drh: g.urlName = mprintf("%b", &cfile); dbda8d6ce9 2007-07-21 drh: g.urlCanonical = mprintf("file://%T", g.urlName); dbda8d6ce9 2007-07-21 drh: blob_reset(&cfile); dbda8d6ce9 2007-07-21 drh: } e621b6dbe3 2007-07-30 drh: } e621b6dbe3 2007-07-30 drh: e621b6dbe3 2007-07-30 drh: /* e621b6dbe3 2007-07-30 drh: ** COMMAND: test-urlparser e621b6dbe3 2007-07-30 drh: */ e621b6dbe3 2007-07-30 drh: void cmd_test_urlparser(void){ 1dbf332352 2008-05-05 drh: int i; f652599003 2008-05-06 drh: url_proxy_options(); 676fdd088a 2008-05-01 drh: if( g.argc!=3 && g.argc!=4 ){ e621b6dbe3 2007-07-30 drh: usage("URL"); e621b6dbe3 2007-07-30 drh: } e621b6dbe3 2007-07-30 drh: url_parse(g.argv[2]); 1dbf332352 2008-05-05 drh: for(i=0; i<2; i++){ 1dbf332352 2008-05-05 drh: printf("g.urlIsFile = %d\n", g.urlIsFile); 797d680ef5 2009-01-13 drh: printf("g.urlIsHttps = %d\n", g.urlIsHttps); 797d680ef5 2009-01-13 drh: printf("g.urlProtocol = %s\n", g.urlProtocol); 1dbf332352 2008-05-05 drh: printf("g.urlName = %s\n", g.urlName); 1dbf332352 2008-05-05 drh: printf("g.urlPort = %d\n", g.urlPort); 797d680ef5 2009-01-13 drh: printf("g.urlDfltPort = %d\n", g.urlDfltPort); 1dbf332352 2008-05-05 drh: printf("g.urlHostname = %s\n", g.urlHostname); 1dbf332352 2008-05-05 drh: printf("g.urlPath = %s\n", g.urlPath); 1dbf332352 2008-05-05 drh: printf("g.urlUser = %s\n", g.urlUser); 1dbf332352 2008-05-05 drh: printf("g.urlPasswd = %s\n", g.urlPasswd); 1dbf332352 2008-05-05 drh: printf("g.urlCanonical = %s\n", g.urlCanonical); 1dbf332352 2008-05-05 drh: if( i==0 ){ 1dbf332352 2008-05-05 drh: printf("********\n"); 1dbf332352 2008-05-05 drh: url_enable_proxy("Using proxy: "); 1dbf332352 2008-05-05 drh: } 1dbf332352 2008-05-05 drh: } 1dbf332352 2008-05-05 drh: } 1dbf332352 2008-05-05 drh: 1dbf332352 2008-05-05 drh: /* 9e274a2e7b 2009-09-12 drh: ** Proxy specified on the command-line using the --proxy option. 9e274a2e7b 2009-09-12 drh: ** If there is no --proxy option on the command-line then this 9e274a2e7b 2009-09-12 drh: ** variable holds a NULL pointer. f652599003 2008-05-06 drh: */ f652599003 2008-05-06 drh: static const char *zProxyOpt = 0; f652599003 2008-05-06 drh: f652599003 2008-05-06 drh: /* 9e274a2e7b 2009-09-12 drh: ** Extract any proxy options from the command-line. f652599003 2008-05-06 drh: ** f652599003 2008-05-06 drh: ** --proxy URL|off f652599003 2008-05-06 drh: ** 9e274a2e7b 2009-09-12 drh: ** This also happens to be a convenient function to use to look for 9e274a2e7b 2009-09-12 drh: ** the --nosync option that will temporarily disable the "autosync" 9e274a2e7b 2009-09-12 drh: ** feature. f652599003 2008-05-06 drh: */ f652599003 2008-05-06 drh: void url_proxy_options(void){ f652599003 2008-05-06 drh: zProxyOpt = find_option("proxy", 0, 1); ec82a32b80 2008-05-10 drh: if( find_option("nosync",0,0) ) g.fNoSync = 1; 676fdd088a 2008-05-01 drh: } 676fdd088a 2008-05-01 drh: 676fdd088a 2008-05-01 drh: /* 9e274a2e7b 2009-09-12 drh: ** If the "proxy" setting is defined, then change the URL settings 9e274a2e7b 2009-09-12 drh: ** (initialized by a prior call to url_parse()) so that the HTTP 9e274a2e7b 2009-09-12 drh: ** header will be appropriate for the proxy and so that the TCP/IP 9e274a2e7b 2009-09-12 drh: ** connection will be opened to the proxy rather than to the server. 797d680ef5 2009-01-13 drh: ** 9e274a2e7b 2009-09-12 drh: ** If zMsg is not NULL and a proxy is used, then print zMsg followed 9e274a2e7b 2009-09-12 drh: ** by the canonical name of the proxy (with userid and password suppressed). 676fdd088a 2008-05-01 drh: */ 676fdd088a 2008-05-01 drh: void url_enable_proxy(const char *zMsg){ f652599003 2008-05-06 drh: const char *zProxy; f652599003 2008-05-06 drh: zProxy = zProxyOpt; f652599003 2008-05-06 drh: if( zProxy==0 ){ f652599003 2008-05-06 drh: zProxy = db_get("proxy", 0); d65d619d94 2008-10-25 a0756885: if( zProxy==0 || zProxy[0]==0 || is_truth(zProxy) ){ f652599003 2008-05-06 drh: zProxy = getenv("http_proxy"); f652599003 2008-05-06 drh: } 387cbeda3f 2008-05-05 drh: } 676fdd088a 2008-05-01 drh: if( zProxy && zProxy[0] && !is_false(zProxy) ){ 676fdd088a 2008-05-01 drh: char *zOriginalUrl = g.urlCanonical; 1dbf332352 2008-05-05 drh: char *zOriginalHost = g.urlHostname; 9e274a2e7b 2009-09-12 drh: char *zOriginalUser = g.urlUser; 9e274a2e7b 2009-09-12 drh: char *zOriginalPasswd = g.urlPasswd; 9e274a2e7b 2009-09-12 drh: g.urlUser = 0; 9e274a2e7b 2009-09-12 drh: g.urlPasswd = ""; 676fdd088a 2008-05-01 drh: url_parse(zProxy); 9e274a2e7b 2009-09-12 drh: if( zMsg ) printf("%s%s\n", zMsg, g.urlCanonical); 676fdd088a 2008-05-01 drh: g.urlPath = zOriginalUrl; 1dbf332352 2008-05-05 drh: g.urlHostname = zOriginalHost; 9e274a2e7b 2009-09-12 drh: if( g.urlUser ){ 9e274a2e7b 2009-09-12 drh: char *zCredentials1 = mprintf("%s:%s", g.urlUser, g.urlPasswd); 9e274a2e7b 2009-09-12 drh: char *zCredentials2 = encode64(zCredentials1, -1); 9e274a2e7b 2009-09-12 drh: g.urlProxyAuth = mprintf("Basic %z", zCredentials2); 9e274a2e7b 2009-09-12 drh: free(zCredentials1); 9e274a2e7b 2009-09-12 drh: } 9e274a2e7b 2009-09-12 drh: g.urlUser = zOriginalUser; 9e274a2e7b 2009-09-12 drh: g.urlPasswd = zOriginalPasswd; c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: c9cd128c2c 2008-11-02 drh: #if INTERFACE c9cd128c2c 2008-11-02 drh: /* c9cd128c2c 2008-11-02 drh: ** An instance of this object is used to build a URL with query parameters. c9cd128c2c 2008-11-02 drh: */ c9cd128c2c 2008-11-02 drh: struct HQuery { c9cd128c2c 2008-11-02 drh: Blob url; /* The URL */ c9cd128c2c 2008-11-02 drh: const char *zBase; /* The base URL */ c9cd128c2c 2008-11-02 drh: int nParam; /* Number of parameters. Max 10 */ c9cd128c2c 2008-11-02 drh: const char *azName[10]; /* Parameter names */ c9cd128c2c 2008-11-02 drh: const char *azValue[10]; /* Parameter values */ c9cd128c2c 2008-11-02 drh: }; c9cd128c2c 2008-11-02 drh: #endif c9cd128c2c 2008-11-02 drh: c9cd128c2c 2008-11-02 drh: /* c9cd128c2c 2008-11-02 drh: ** Initialize the URL object. c9cd128c2c 2008-11-02 drh: */ c9cd128c2c 2008-11-02 drh: void url_initialize(HQuery *p, const char *zBase){ c9cd128c2c 2008-11-02 drh: blob_zero(&p->url); c9cd128c2c 2008-11-02 drh: p->zBase = zBase; c9cd128c2c 2008-11-02 drh: p->nParam = 0; c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: c9cd128c2c 2008-11-02 drh: /* c9cd128c2c 2008-11-02 drh: ** Add a fixed parameter to an HQuery. c9cd128c2c 2008-11-02 drh: */ c9cd128c2c 2008-11-02 drh: void url_add_parameter(HQuery *p, const char *zName, const char *zValue){ c9cd128c2c 2008-11-02 drh: assert( p->nParam < count(p->azName) ); c9cd128c2c 2008-11-02 drh: assert( p->nParam < count(p->azValue) ); c9cd128c2c 2008-11-02 drh: p->azName[p->nParam] = zName; c9cd128c2c 2008-11-02 drh: p->azValue[p->nParam] = zValue; c9cd128c2c 2008-11-02 drh: p->nParam++; c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: c9cd128c2c 2008-11-02 drh: /* c9cd128c2c 2008-11-02 drh: ** Render the URL with a parameter override. c9cd128c2c 2008-11-02 drh: */ c9cd128c2c 2008-11-02 drh: char *url_render( c9cd128c2c 2008-11-02 drh: HQuery *p, /* Base URL */ c9cd128c2c 2008-11-02 drh: const char *zName1, /* First override */ c9cd128c2c 2008-11-02 drh: const char *zValue1, /* First override value */ c9cd128c2c 2008-11-02 drh: const char *zName2, /* Second override */ c9cd128c2c 2008-11-02 drh: const char *zValue2 /* Second override value */ c9cd128c2c 2008-11-02 drh: ){ c9cd128c2c 2008-11-02 drh: const char *zSep = "?"; c9cd128c2c 2008-11-02 drh: int i; c9cd128c2c 2008-11-02 drh: c9cd128c2c 2008-11-02 drh: blob_reset(&p->url); c9cd128c2c 2008-11-02 drh: blob_appendf(&p->url, "%s/%s", g.zBaseURL, p->zBase); c9cd128c2c 2008-11-02 drh: for(i=0; i<p->nParam; i++){ c9cd128c2c 2008-11-02 drh: const char *z = p->azValue[i]; c9cd128c2c 2008-11-02 drh: if( zName1 && strcmp(zName1,p->azName[i])==0 ){ c9cd128c2c 2008-11-02 drh: zName1 = 0; c9cd128c2c 2008-11-02 drh: z = zValue1; c9cd128c2c 2008-11-02 drh: if( z==0 ) continue; c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: if( zName2 && strcmp(zName2,p->azName[i])==0 ){ c9cd128c2c 2008-11-02 drh: zName2 = 0; c9cd128c2c 2008-11-02 drh: z = zValue2; c9cd128c2c 2008-11-02 drh: if( z==0 ) continue; c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: blob_appendf(&p->url, "%s%s=%T", zSep, p->azName[i], z); c9cd128c2c 2008-11-02 drh: zSep = "&"; c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: if( zName1 && zValue1 ){ c9cd128c2c 2008-11-02 drh: blob_appendf(&p->url, "%s%s=%T", zSep, zName1, zValue1); c9cd128c2c 2008-11-02 drh: } c9cd128c2c 2008-11-02 drh: if( zName2 && zValue2 ){ c9cd128c2c 2008-11-02 drh: blob_appendf(&p->url, "%s%s=%T", zSep, zName2, zValue2); 676fdd088a 2008-05-01 drh: } c9cd128c2c 2008-11-02 drh: return blob_str(&p->url); dbda8d6ce9 2007-07-21 drh: }