dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Copyright (c) 2006 D. Richard Hipp dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This program is free software; you can redistribute it and/or dbda8d6ce9 2007-07-21 drh: ** modify it under the terms of the GNU General Public dbda8d6ce9 2007-07-21 drh: ** License version 2 as published by the Free Software Foundation. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This program is distributed in the hope that it will be useful, dbda8d6ce9 2007-07-21 drh: ** but WITHOUT ANY WARRANTY; without even the implied warranty of dbda8d6ce9 2007-07-21 drh: ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dbda8d6ce9 2007-07-21 drh: ** General Public License for more details. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** You should have received a copy of the GNU General Public dbda8d6ce9 2007-07-21 drh: ** License along with this library; if not, write to the dbda8d6ce9 2007-07-21 drh: ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, dbda8d6ce9 2007-07-21 drh: ** Boston, MA 02111-1307, USA. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Author contact information: dbda8d6ce9 2007-07-21 drh: ** drh@hwaci.com dbda8d6ce9 2007-07-21 drh: ** http://www.hwaci.com/drh/ dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ******************************************************************************* dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Routines for encoding and decoding text. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: #include "config.h" dbda8d6ce9 2007-07-21 drh: #include "encode.h" dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Make the given string safe for HTML by converting every "<" into "<", dbda8d6ce9 2007-07-21 drh: ** every ">" into ">" and every "&" into "&". Return a pointer dbda8d6ce9 2007-07-21 drh: ** to a new string obtained from malloc(). dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** We also encode " as " so that it can appear as an argument dbda8d6ce9 2007-07-21 drh: ** to markup. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: char *htmlize(const char *zIn, int n){ dbda8d6ce9 2007-07-21 drh: int c; dbda8d6ce9 2007-07-21 drh: int i = 0; dbda8d6ce9 2007-07-21 drh: int count = 0; dbda8d6ce9 2007-07-21 drh: char *zOut; dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: if( n<0 ) n = strlen(zIn); dbda8d6ce9 2007-07-21 drh: while( i<n && (c = zIn[i])!=0 ){ dbda8d6ce9 2007-07-21 drh: switch( c ){ dbda8d6ce9 2007-07-21 drh: case '<': count += 4; break; dbda8d6ce9 2007-07-21 drh: case '>': count += 4; break; dbda8d6ce9 2007-07-21 drh: case '&': count += 5; break; dbda8d6ce9 2007-07-21 drh: case '"': count += 6; break; dbda8d6ce9 2007-07-21 drh: default: count++; break; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: i++; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: i = 0; dbda8d6ce9 2007-07-21 drh: zOut = malloc( count+1 ); dbda8d6ce9 2007-07-21 drh: if( zOut==0 ) return 0; dbda8d6ce9 2007-07-21 drh: while( n-->0 && (c = *zIn)!=0 ){ dbda8d6ce9 2007-07-21 drh: switch( c ){ dbda8d6ce9 2007-07-21 drh: case '<': dbda8d6ce9 2007-07-21 drh: zOut[i++] = '&'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'l'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 't'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = ';'; dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: case '>': dbda8d6ce9 2007-07-21 drh: zOut[i++] = '&'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'g'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 't'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = ';'; dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: case '&': dbda8d6ce9 2007-07-21 drh: zOut[i++] = '&'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'a'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'm'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'p'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = ';'; dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: case '"': dbda8d6ce9 2007-07-21 drh: zOut[i++] = '&'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'q'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'u'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 'o'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = 't'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = ';'; dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: default: dbda8d6ce9 2007-07-21 drh: zOut[i++] = c; dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zIn++; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zOut[i] = 0; dbda8d6ce9 2007-07-21 drh: return zOut; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Encode a string for HTTP. This means converting lots of dbda8d6ce9 2007-07-21 drh: ** characters into the "%HH" where H is a hex digit. It also dbda8d6ce9 2007-07-21 drh: ** means converting spaces to "+". dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** This is the opposite of DeHttpizeString below. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: static char *EncodeHttp(const char *zIn, int n, int encodeSlash){ dbda8d6ce9 2007-07-21 drh: int c; dbda8d6ce9 2007-07-21 drh: int i = 0; dbda8d6ce9 2007-07-21 drh: int count = 0; dbda8d6ce9 2007-07-21 drh: char *zOut; dbda8d6ce9 2007-07-21 drh: int other; dbda8d6ce9 2007-07-21 drh: # define IsSafeChar(X) \ dbda8d6ce9 2007-07-21 drh: (isalnum(X) || (X)=='.' || (X)=='$' || (X)=='-' || (X)=='_' || (X)==other) dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: if( zIn==0 ) return 0; dbda8d6ce9 2007-07-21 drh: if( n<0 ) n = strlen(zIn); dbda8d6ce9 2007-07-21 drh: other = encodeSlash ? 'a' : '/'; dbda8d6ce9 2007-07-21 drh: while( i<n && (c = zIn[i])!=0 ){ dbda8d6ce9 2007-07-21 drh: if( IsSafeChar(c) || c==' ' ){ dbda8d6ce9 2007-07-21 drh: count++; dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: count += 3; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: i++; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: i = 0; dbda8d6ce9 2007-07-21 drh: zOut = malloc( count+1 ); dbda8d6ce9 2007-07-21 drh: if( zOut==0 ) return 0; dbda8d6ce9 2007-07-21 drh: while( n-->0 && (c = *zIn)!=0 ){ dbda8d6ce9 2007-07-21 drh: if( IsSafeChar(c) ){ dbda8d6ce9 2007-07-21 drh: zOut[i++] = c; dbda8d6ce9 2007-07-21 drh: }else if( c==' ' ){ dbda8d6ce9 2007-07-21 drh: zOut[i++] = '+'; dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: zOut[i++] = '%'; dbda8d6ce9 2007-07-21 drh: zOut[i++] = "0123456789ABCDEF"[(c>>4)&0xf]; dbda8d6ce9 2007-07-21 drh: zOut[i++] = "0123456789ABCDEF"[c&0xf]; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zIn++; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zOut[i] = 0; dbda8d6ce9 2007-07-21 drh: return zOut; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Convert the input string into a form that is suitable for use as dbda8d6ce9 2007-07-21 drh: ** a token in the HTTP protocol. Spaces are encoded as '+' and special dbda8d6ce9 2007-07-21 drh: ** characters are encoded as "%HH" where HH is a two-digit hexidecimal dbda8d6ce9 2007-07-21 drh: ** representation of the character. The "/" character is encoded dbda8d6ce9 2007-07-21 drh: ** as "%2F". dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: char *httpize(const char *z, int n){ dbda8d6ce9 2007-07-21 drh: return EncodeHttp(z, n, 1); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Convert the input string into a form that is suitable for use as dbda8d6ce9 2007-07-21 drh: ** a token in the HTTP protocol. Spaces are encoded as '+' and special dbda8d6ce9 2007-07-21 drh: ** characters are encoded as "%HH" where HH is a two-digit hexidecimal dbda8d6ce9 2007-07-21 drh: ** representation of the character. The "/" character is not encoded dbda8d6ce9 2007-07-21 drh: ** by this routine. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: char *urlize(const char *z, int n){ dbda8d6ce9 2007-07-21 drh: return EncodeHttp(z, n, 0); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Convert a single HEX digit to an integer dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: static int AsciiToHex(int c){ dbda8d6ce9 2007-07-21 drh: if( c>='a' && c<='f' ){ dbda8d6ce9 2007-07-21 drh: c += 10 - 'a'; dbda8d6ce9 2007-07-21 drh: }else if( c>='A' && c<='F' ){ dbda8d6ce9 2007-07-21 drh: c += 10 - 'A'; dbda8d6ce9 2007-07-21 drh: }else if( c>='0' && c<='9' ){ dbda8d6ce9 2007-07-21 drh: c -= '0'; dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: c = 0; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: return c; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Remove the HTTP encodings from a string. The conversion is done dbda8d6ce9 2007-07-21 drh: ** in-place. Return the length of the string after conversion. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: int dehttpize(char *z){ dbda8d6ce9 2007-07-21 drh: int i, j; 3dcaed8d86 2007-07-28 dan: 3dcaed8d86 2007-07-28 dan: /* Treat a null pointer as a zero-length string. */ 3dcaed8d86 2007-07-28 dan: if( !z ) return 0; 3dcaed8d86 2007-07-28 dan: dbda8d6ce9 2007-07-21 drh: i = j = 0; dbda8d6ce9 2007-07-21 drh: while( z[i] ){ dbda8d6ce9 2007-07-21 drh: switch( z[i] ){ dbda8d6ce9 2007-07-21 drh: case '%': dbda8d6ce9 2007-07-21 drh: if( z[i+1] && z[i+2] ){ dbda8d6ce9 2007-07-21 drh: z[j] = AsciiToHex(z[i+1]) << 4; dbda8d6ce9 2007-07-21 drh: z[j] |= AsciiToHex(z[i+2]); dbda8d6ce9 2007-07-21 drh: i += 2; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: case '+': dbda8d6ce9 2007-07-21 drh: z[j] = ' '; dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: default: dbda8d6ce9 2007-07-21 drh: z[j] = z[i]; dbda8d6ce9 2007-07-21 drh: break; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: i++; dbda8d6ce9 2007-07-21 drh: j++; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: z[j] = 0; dbda8d6ce9 2007-07-21 drh: return j; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** The "fossilize" encoding is used in the headers of records dbda8d6ce9 2007-07-21 drh: ** (aka "content files") to escape special characters. The dbda8d6ce9 2007-07-21 drh: ** fossilize encoding passes most characters through unchanged. dbda8d6ce9 2007-07-21 drh: ** The changes are these: dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** space -> \s dbda8d6ce9 2007-07-21 drh: ** tab -> \t dbda8d6ce9 2007-07-21 drh: ** newline -> \n dbda8d6ce9 2007-07-21 drh: ** cr -> \r dbda8d6ce9 2007-07-21 drh: ** formfeed -> \f dbda8d6ce9 2007-07-21 drh: ** vtab -> \v dbda8d6ce9 2007-07-21 drh: ** nul -> \0 dbda8d6ce9 2007-07-21 drh: ** \ -> \\ dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** The fossilize() routine does an encoding of its input and dbda8d6ce9 2007-07-21 drh: ** returns a pointer to the encoding in space obtained from dbda8d6ce9 2007-07-21 drh: ** malloc. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: char *fossilize(const char *zIn, int nIn){ dbda8d6ce9 2007-07-21 drh: int n, i, j, c; dbda8d6ce9 2007-07-21 drh: char *zOut; dbda8d6ce9 2007-07-21 drh: if( nIn<0 ) nIn = strlen(zIn); dbda8d6ce9 2007-07-21 drh: for(i=n=0; i<nIn; i++){ dbda8d6ce9 2007-07-21 drh: c = zIn[i]; dbda8d6ce9 2007-07-21 drh: if( c==0 || isspace(c) || c=='\\' ) n++; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: n += nIn; dbda8d6ce9 2007-07-21 drh: zOut = malloc( n+1 ); dbda8d6ce9 2007-07-21 drh: if( zOut ){ dbda8d6ce9 2007-07-21 drh: for(i=j=0; i<nIn; i++){ dbda8d6ce9 2007-07-21 drh: int c = zIn[i]; dbda8d6ce9 2007-07-21 drh: if( c==0 ){ dbda8d6ce9 2007-07-21 drh: zOut[j++] = '\\'; dbda8d6ce9 2007-07-21 drh: zOut[j++] = '0'; dbda8d6ce9 2007-07-21 drh: }else if( c=='\\' ){ dbda8d6ce9 2007-07-21 drh: zOut[j++] = '\\'; dbda8d6ce9 2007-07-21 drh: zOut[j++] = '\\'; dbda8d6ce9 2007-07-21 drh: }else if( isspace(c) ){ dbda8d6ce9 2007-07-21 drh: zOut[j++] = '\\'; dbda8d6ce9 2007-07-21 drh: switch( c ){ dbda8d6ce9 2007-07-21 drh: case '\n': c = 'n'; break; dbda8d6ce9 2007-07-21 drh: case ' ': c = 's'; break; dbda8d6ce9 2007-07-21 drh: case '\t': c = 't'; break; dbda8d6ce9 2007-07-21 drh: case '\r': c = 'r'; break; dbda8d6ce9 2007-07-21 drh: case '\v': c = 'v'; break; dbda8d6ce9 2007-07-21 drh: case '\f': c = 'f'; break; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zOut[j++] = c; dbda8d6ce9 2007-07-21 drh: }else{ dbda8d6ce9 2007-07-21 drh: zOut[j++] = c; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zOut[j] = 0; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: return zOut; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Decode a fossilized string in-place. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void defossilize(char *z){ dbda8d6ce9 2007-07-21 drh: int i, j, c; dbda8d6ce9 2007-07-21 drh: for(i=j=0; z[i]; i++){ dbda8d6ce9 2007-07-21 drh: c = z[i]; dbda8d6ce9 2007-07-21 drh: if( c=='\\' && z[i+1] ){ dbda8d6ce9 2007-07-21 drh: i++; dbda8d6ce9 2007-07-21 drh: switch( z[i] ){ dbda8d6ce9 2007-07-21 drh: case 'n': c = '\n'; break; dbda8d6ce9 2007-07-21 drh: case 's': c = ' '; break; dbda8d6ce9 2007-07-21 drh: case 't': c = '\t'; break; dbda8d6ce9 2007-07-21 drh: case 'r': c = '\r'; break; dbda8d6ce9 2007-07-21 drh: case 'v': c = '\v'; break; dbda8d6ce9 2007-07-21 drh: case 'f': c = '\f'; break; dbda8d6ce9 2007-07-21 drh: case '0': c = 0; break; dbda8d6ce9 2007-07-21 drh: case '\\': c = '\\'; break; dbda8d6ce9 2007-07-21 drh: default: c = z[i]; break; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: z[j++] = c; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: z[j] = 0; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** The characters used for HTTP base64 encoding. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: static unsigned char zBase[] = dbda8d6ce9 2007-07-21 drh: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"; dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Encode a string using a base-64 encoding. dbda8d6ce9 2007-07-21 drh: ** The encoding can be reversed using the <b>decode64</b> function. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Space to hold the result comes from malloc(). dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: char *encode64(const char *zData, int nData){ dbda8d6ce9 2007-07-21 drh: char *z64; dbda8d6ce9 2007-07-21 drh: int i, n; dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: if( nData<=0 ){ dbda8d6ce9 2007-07-21 drh: nData = strlen(zData); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: z64 = malloc( (nData*4)/3 + 6 ); dbda8d6ce9 2007-07-21 drh: for(i=n=0; i+2<nData; i+=3){ dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ (zData[i]>>2) & 0x3f ]; dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ ((zData[i]<<4) & 0x30) | ((zData[i+1]>>4) & 0x0f) ]; dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ ((zData[i+1]<<2) & 0x3c) | ((zData[i+2]>>6) & 0x03) ]; dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ zData[i+2] & 0x3f ]; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: if( i+1<nData ){ dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ (zData[i]>>2) & 0x3f ]; dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ ((zData[i]<<4) & 0x30) | ((zData[i+1]>>4) & 0x0f) ]; dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ ((zData[i+1]<<2) & 0x3c) ]; dbda8d6ce9 2007-07-21 drh: }else if( i<nData ){ dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ (zData[i]>>2) & 0x3f ]; dbda8d6ce9 2007-07-21 drh: z64[n++] = zBase[ ((zData[i]<<4) & 0x30) ]; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: z64[n] = 0; dbda8d6ce9 2007-07-21 drh: return z64; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** This function treats its input as a base-64 string and returns the dbda8d6ce9 2007-07-21 drh: ** decoded value of that string. Characters of input that are not dbda8d6ce9 2007-07-21 drh: ** valid base-64 characters (such as spaces and newlines) are ignored. dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** Space to hold the decoded string is obtained from malloc(). dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** The number of bytes decoded is returned in *pnByte dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: char *decode64(const char *z64, int *pnByte){ dbda8d6ce9 2007-07-21 drh: char *zData; dbda8d6ce9 2007-07-21 drh: int n64; dbda8d6ce9 2007-07-21 drh: int i, j; dbda8d6ce9 2007-07-21 drh: int a, b, c, d; dbda8d6ce9 2007-07-21 drh: static int isInit = 0; dbda8d6ce9 2007-07-21 drh: static int trans[128]; dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: if( !isInit ){ dbda8d6ce9 2007-07-21 drh: for(i=0; i<128; i++){ trans[i] = 0; } dbda8d6ce9 2007-07-21 drh: for(i=0; zBase[i]; i++){ trans[zBase[i] & 0x7f] = i; } dbda8d6ce9 2007-07-21 drh: isInit = 1; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: n64 = strlen(z64); dbda8d6ce9 2007-07-21 drh: while( n64>0 && z64[n64-1]=='=' ) n64--; dbda8d6ce9 2007-07-21 drh: zData = malloc( (n64*3)/4 + 4 ); dbda8d6ce9 2007-07-21 drh: for(i=j=0; i+3<n64; i+=4){ dbda8d6ce9 2007-07-21 drh: a = trans[z64[i] & 0x7f]; dbda8d6ce9 2007-07-21 drh: b = trans[z64[i+1] & 0x7f]; dbda8d6ce9 2007-07-21 drh: c = trans[z64[i+2] & 0x7f]; dbda8d6ce9 2007-07-21 drh: d = trans[z64[i+3] & 0x7f]; dbda8d6ce9 2007-07-21 drh: zData[j++] = ((a<<2) & 0xfc) | ((b>>4) & 0x03); dbda8d6ce9 2007-07-21 drh: zData[j++] = ((b<<4) & 0xf0) | ((c>>2) & 0x0f); dbda8d6ce9 2007-07-21 drh: zData[j++] = ((c<<6) & 0xc0) | (d & 0x3f); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: if( i+2<n64 ){ dbda8d6ce9 2007-07-21 drh: a = trans[z64[i] & 0x7f]; dbda8d6ce9 2007-07-21 drh: b = trans[z64[i+1] & 0x7f]; dbda8d6ce9 2007-07-21 drh: c = trans[z64[i+2] & 0x7f]; dbda8d6ce9 2007-07-21 drh: zData[j++] = ((a<<2) & 0xfc) | ((b>>4) & 0x03); dbda8d6ce9 2007-07-21 drh: zData[j++] = ((b<<4) & 0xf0) | ((c>>2) & 0x0f); dbda8d6ce9 2007-07-21 drh: }else if( i+1<n64 ){ dbda8d6ce9 2007-07-21 drh: a = trans[z64[i] & 0x7f]; dbda8d6ce9 2007-07-21 drh: b = trans[z64[i+1] & 0x7f]; dbda8d6ce9 2007-07-21 drh: zData[j++] = ((a<<2) & 0xfc) | ((b>>4) & 0x03); dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: zData[j] = 0; dbda8d6ce9 2007-07-21 drh: *pnByte = j; dbda8d6ce9 2007-07-21 drh: return zData; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** The base-16 encoding using the following characters: dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: ** 0123456789abcdef dbda8d6ce9 2007-07-21 drh: ** dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** The array used for encoding dbda8d6ce9 2007-07-21 drh: */ /* 123456789 12345 */ dbda8d6ce9 2007-07-21 drh: static const char zEncode[] = "0123456789abcdef"; dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Encode a N-digit base-256 in base-16. Return zero on success dbda8d6ce9 2007-07-21 drh: ** and non-zero if there is an error. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: int encode16(const unsigned char *pIn, unsigned char *zOut, int N){ dbda8d6ce9 2007-07-21 drh: int i; dbda8d6ce9 2007-07-21 drh: for(i=0; i<N; i++){ d2b4469934 2007-11-21 drh: *(zOut++) = zEncode[pIn[i]>>4]; d2b4469934 2007-11-21 drh: *(zOut++) = zEncode[pIn[i]&0xf]; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: *zOut = 0; dbda8d6ce9 2007-07-21 drh: return 0; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** An array for translating single base-16 characters into a value. dbda8d6ce9 2007-07-21 drh: ** Disallowed input characters have a value of 64. Upper and lower dbda8d6ce9 2007-07-21 drh: ** case is the same. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: static const char zDecode[] = { dbda8d6ce9 2007-07-21 drh: 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, dbda8d6ce9 2007-07-21 drh: 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, dbda8d6ce9 2007-07-21 drh: 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, dbda8d6ce9 2007-07-21 drh: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 64, 64, 64, 64, 64, 64, dbda8d6ce9 2007-07-21 drh: 64, 10, 11, 12, 13, 14, 15, 64, 64, 1, 64, 64, 1, 64, 64, 0, dbda8d6ce9 2007-07-21 drh: 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, dbda8d6ce9 2007-07-21 drh: 64, 10, 11, 12, 13, 14, 15, 64, 64, 1, 64, 64, 1, 64, 64, 0, dbda8d6ce9 2007-07-21 drh: 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, dbda8d6ce9 2007-07-21 drh: }; dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Decode a N-character base-16 number into base-256. N must be a dbda8d6ce9 2007-07-21 drh: ** multiple of 2. The output buffer must be at least N/2 characters dbda8d6ce9 2007-07-21 drh: ** in length dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: int decode16(const unsigned char *zIn, unsigned char *pOut, int N){ dbda8d6ce9 2007-07-21 drh: int i, j; dbda8d6ce9 2007-07-21 drh: if( (N&1)!=0 ) return 1; dbda8d6ce9 2007-07-21 drh: for(i=j=0; i<N; i += 2, j++){ dbda8d6ce9 2007-07-21 drh: int v1, v2, a; dbda8d6ce9 2007-07-21 drh: a = zIn[i]; dbda8d6ce9 2007-07-21 drh: if( (a & 0x80)!=0 || (v1 = zDecode[a])==64 ) return 1; dbda8d6ce9 2007-07-21 drh: a = zIn[i+1]; dbda8d6ce9 2007-07-21 drh: if( (a & 0x80)!=0 || (v2 = zDecode[a])==64 ) return 1; dbda8d6ce9 2007-07-21 drh: pOut[j] = (v1<<4) + v2; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: return 0; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** Return true if the input string contains only valid base-16 digits. dbda8d6ce9 2007-07-21 drh: ** If any invalid characters appear in the string, return false. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: int validate16(const char *zIn, int nIn){ dbda8d6ce9 2007-07-21 drh: int c, i; dbda8d6ce9 2007-07-21 drh: for(i=0; i<nIn && (c = zIn[i])!=0; i++){ dbda8d6ce9 2007-07-21 drh: if( c & ~0x7f ) return 0; dbda8d6ce9 2007-07-21 drh: if( zDecode[c]>63 ) return 0; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: return 1; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: dbda8d6ce9 2007-07-21 drh: /* dbda8d6ce9 2007-07-21 drh: ** The input string is a base16 value. Convert it into its canonical dbda8d6ce9 2007-07-21 drh: ** form. This means that digits are all lower case and that conversions dbda8d6ce9 2007-07-21 drh: ** like "l"->"1" and "O"->"0" occur. dbda8d6ce9 2007-07-21 drh: */ dbda8d6ce9 2007-07-21 drh: void canonical16(char *z, int n){ dbda8d6ce9 2007-07-21 drh: while( *z && n-- ){ dbda8d6ce9 2007-07-21 drh: *z = zEncode[zDecode[(*z)&0x7f]&0x1f]; dbda8d6ce9 2007-07-21 drh: z++; dbda8d6ce9 2007-07-21 drh: } dbda8d6ce9 2007-07-21 drh: }