Artifact Content
Not logged in

Artifact 98f347f72aeacc308173cdf50cee77571389b374

File src/blob.c part of check-in [dbda8d6ce9] - Initial check-in of m1 sources. by drh on 2007-07-21 14:10:57.

/*
** Copyright (c) 2006 D. Richard Hipp
**
** This program is free software; you can redistribute it and/or
** modify it under the terms of the GNU General Public
** License version 2 as published by the Free Software Foundation.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
** General Public License for more details.
** 
** You should have received a copy of the GNU General Public
** License along with this library; if not, write to the
** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
** Boston, MA  02111-1307, USA.
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*******************************************************************************
**
** A Blob is a variable-length containers for arbitrary string
** or binary data.
*/
#include "config.h"
#include <zlib.h>
#include "blob.h"

#if INTERFACE
/*
** A Blob can hold a string or a binary object of arbitrary size.  The
** size changes as necessary.
*/
struct Blob {
  unsigned int nUsed;            /* Number of bytes used in aData[] */
  unsigned int nAlloc;           /* Number of bytes allocated for aData[] */
  unsigned int iCursor;          /* Next character of input to parse */
  char *aData;                   /* Where the information is stored */
  void (*xRealloc)(Blob*, unsigned int); /* Function to reallocate the buffer */
};

/*
** The current size of a Blob
*/
#define blob_size(X)  ((X)->nUsed)

/*
** The buffer holding the blob data
*/
#define blob_buffer(X)  ((X)->aData)

#endif /* INTERFACE */

/*
** Make sure a blob is initialized
*/
#define blob_is_init(x) \
  assert((x)->xRealloc==blobReallocMalloc || (x)->xRealloc==blobReallocStatic)

/*
** Make sure a blob does not contain malloced memory.
*/
#if 0  /* Enable for debugging only */
#define blob_is_reset(x) \
  assert((x)->xRealloc!=blobReallocMalloc || (x)->nAlloc==0)
#else
#define blob_is_reset(x)
#endif

/*
** This routine is called if a blob operation fails because we
** have run out of memory.
*/
static void blob_panic(void){
  static const char zErrMsg[] = "out of memory\n";
  write(2, zErrMsg, sizeof(zErrMsg)-1);
  exit(1);
}

/*
** A reallocation function that assumes that aData came from malloc().
** This function attempts to resize the buffer of the blob to hold
** newSize bytes.  
**
** No attempt is made to recover from an out-of-memory error.
** If an OOM error occurs, an error message is printed on stderr
** and the program exits.
*/
void blobReallocMalloc(Blob *pBlob, unsigned int newSize){
  if( newSize==0 ){
    free(pBlob->aData);
    pBlob->aData = 0;
    pBlob->nAlloc = 0;
    pBlob->nUsed = 0;
    pBlob->iCursor = 0;
  }else if( newSize>pBlob->nAlloc || newSize<pBlob->nAlloc-4000 ){
    char *pNew = realloc(pBlob->aData, newSize);
    if( pNew==0 ) blob_panic();
    pBlob->aData = pNew;
    pBlob->nAlloc = newSize;
    if( pBlob->nUsed>pBlob->nAlloc ){
      pBlob->nUsed = pBlob->nAlloc;
    }
  }
}

/*
** An initializer for Blobs
*/
#if INTERFACE
#define BLOB_INITIALIZER  {0,0,0,0,blobReallocMalloc}
#endif
const Blob empty_blob = BLOB_INITIALIZER;

/*
** A reallocation function for when the initial string is in unmanaged
** space.  Copy the string to memory obtained from malloc().
*/
static void blobReallocStatic(Blob *pBlob, unsigned int newSize){
  if( newSize==0 ){
    *pBlob = empty_blob;
  }else{
    char *pNew = malloc( newSize );
    if( pNew==0 ) blob_panic();
    if( pBlob->nUsed>newSize ) pBlob->nUsed = newSize;
    memcpy(pNew, pBlob->aData, pBlob->nUsed);
    pBlob->aData = pNew;
    pBlob->xRealloc = blobReallocMalloc;
    pBlob->nAlloc = newSize;
  }
}

/*
** Reset a blob to be an empty container.
*/
void blob_reset(Blob *pBlob){
  blob_is_init(pBlob);
  pBlob->xRealloc(pBlob, 0);
}

/*
** Initialize a blob to a string or byte-array constant of a specified length.
** Any prior data in the blob is discarded.
*/
void blob_init(Blob *pBlob, const char *zData, int size){
  blob_is_reset(pBlob);
  if( zData==0 ){
    *pBlob = empty_blob;
  }else{
    if( size<=0 ) size = strlen(zData);
    pBlob->nUsed = pBlob->nAlloc = size;
    pBlob->aData = (char*)zData;
    pBlob->iCursor = 0;
    pBlob->xRealloc = blobReallocStatic;
  }
}

/*
** Initialize a blob to a nul-terminated string.
** Any prior data in the blob is discarded.
*/
void blob_set(Blob *pBlob, const char *zStr){
  blob_init(pBlob, zStr, -1);
}

/*
** Initialize a blob to an empty string.
*/
void blob_zero(Blob *pBlob){
  static const char zEmpty[] = "";
  blob_is_reset(pBlob);
  pBlob->nUsed = 0;
  pBlob->nAlloc = 1;
  pBlob->aData = (char*)zEmpty;
  pBlob->iCursor = 0;
  pBlob->xRealloc = blobReallocStatic;
}

/*
** Append text or data to the end of a blob.
*/
void blob_append(Blob *pBlob, const char *aData, int nData){
  blob_is_init(pBlob);
  if( nData<0 ) nData = strlen(aData);
  if( nData==0 ) return;
  if( pBlob->nUsed + nData >= pBlob->nAlloc ){
    pBlob->xRealloc(pBlob, pBlob->nUsed + nData + pBlob->nAlloc + 100);
    if( pBlob->nUsed + nData >= pBlob->nAlloc ){
      blob_panic();
    }
  }
  memcpy(&pBlob->aData[pBlob->nUsed], aData, nData);
  pBlob->nUsed += nData;
  pBlob->aData[pBlob->nUsed] = 0;   /* Blobs are always nul-terminated */
}

/*
** Copy a blob
*/
void blob_copy(Blob *pTo, Blob *pFrom){
  blob_is_init(pFrom);
  blob_is_init(pTo);
  blob_zero(pTo);
  blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
}

/*
** Return a pointer to a null-terminated string for a blob.
*/
char *blob_str(Blob *p){
  blob_is_init(p);
  if( p->aData[p->nUsed]!=0 ){
    blob_materialize(p);
  }
  return p->aData;
}

/*
** Return a pointer to a null-terminated string for a blob.
**
** WARNING:  If the blob is ephemeral, it might cause a '\000'
** character to be inserted into the middle of the parent blob.
** Example:  Suppose p is a token extracted from some larger
** blob pBig using blob_token().  If you call this routine on p,
** then a '\000' character will be inserted in the middle of
** pBig in order to cause p to be nul-terminated.  If pBig
** should not be modified, then use blob_str() instead of this
** routine.  blob_str() will make a copy of the p if necessary
** to avoid modifying pBig.
*/
char *blob_terminate(Blob *p){
  blob_is_init(p);
  p->aData[p->nUsed] = 0;
  return p->aData;
}

/*
** Compare two blobs.
*/
int blob_compare(Blob *pA, Blob *pB){
  int szA, szB, sz, rc;
  blob_is_init(pA);
  blob_is_init(pB);
  szA = blob_size(pA);
  szB = blob_size(pB);
  sz = szA<szB ? szA : szB;
  rc = memcmp(blob_buffer(pA), blob_buffer(pB), sz);
  if( rc==0 ){
    rc = szA - szB;
  }
  return rc;
}

/*
** Compare a blob to a string.  Return TRUE if they are equal.
*/
int blob_eq_str(Blob *pBlob, const char *z, int n){
  Blob t;
  blob_is_init(pBlob);
  if( n<=0 ) n = strlen(z);
  t.aData = (char*)z;
  t.nUsed = n;
  t.xRealloc = blobReallocStatic;
  return blob_compare(pBlob, &t)==0;
}

/*
** This macro compares a blob against a string constant.  We use the sizeof()
** operator on the string constant twice, so it really does need to be a
** string literal or character array - not a character pointer.
*/
#if INTERFACE
# define blob_eq(B,S) \
     ((B)->nUsed==sizeof(S)-1 && memcmp((B)->aData,S,sizeof(S)-1)==0)
#endif


/*
** Attempt to resize a blob so that its internal buffer is 
** nByte in size.  The blob is truncated if necessary.
*/
void blob_resize(Blob *pBlob, unsigned int newSize){
  pBlob->xRealloc(pBlob, newSize+1);
  pBlob->nUsed = newSize;
  pBlob->aData[newSize] = 0;
}

/*
** Make sure a blob is nul-terminated and is not a pointer to unmanaged
** space.  Return a pointer to the
*/
char *blob_materialize(Blob *pBlob){
  blob_resize(pBlob, pBlob->nUsed);
  return pBlob->aData;
}


/*
** Call dehttpize on a blob.  This causes an ephemeral blob to be
** materialized.
*/
void blob_dehttpize(Blob *pBlob){
  blob_materialize(pBlob);
  pBlob->nUsed = dehttpize(pBlob->aData);
}

/*
** Extract N bytes from blob pFrom and use it to initialize blob pTo.
** Return the actual number of bytes extracted.
**
** After this call completes, pTo will be an ephemeral blob.
*/
int blob_extract(Blob *pFrom, int N, Blob *pTo){
  blob_is_init(pFrom);
  blob_is_reset(pTo);
  if( pFrom->iCursor + N > pFrom->nUsed ){
    N = pFrom->nUsed - pFrom->iCursor;
    if( N<=0 ){
      blob_zero(pTo);
      return 0;
    }
  }
  pTo->nUsed = N;
  pTo->nAlloc = N;
  pTo->aData = &pFrom->aData[pFrom->iCursor];
  pTo->iCursor = 0;
  pTo->xRealloc = blobReallocStatic;
  pFrom->iCursor += N;
  return N;
}

/*
** Rewind the cursor on a blob back to the beginning.
*/
void blob_rewind(Blob *p){
  p->iCursor = 0;
}

/*
** Extract a single line of text from pFrom beginning at the current 
** cursor location and use that line of text to initialize pTo.
** Return the number of bytes in the line.
**
** The cursor of pFrom is left pointing at the first byte past the
** \n that terminated the line.
**
** pTo will be an ephermeral blob.  If pFrom changes, it might alter
** pTo as well.
*/
int blob_line(Blob *pFrom, Blob *pTo){
  char *aData = pFrom->aData;
  int n = pFrom->nUsed;
  int i = pFrom->iCursor;
  /* Do not skip blank lines
  ** while( i<n && aData[i]=='\n' ){ i++; }
  ** pFrom->iCursor = i;
  */
  while( i<n && aData[i]!='\n' ){ i++; }
  blob_extract(pFrom, i-pFrom->iCursor, pTo);
  if( i<n && aData[i]=='\n' ){ pFrom->iCursor++; }
  return pTo->nUsed;
}

/*
** Extract a single token from pFrom and use it to initialize pTo.
** Return the number of bytes in the token.  If no token is found,
** return 0.
**
** The cursor of pFrom is left pointing at the first character past
** the end of the token.
**
** pTo will be an ephermeral blob.  If pFrom changes, it might alter
** pTo as well.
*/
int blob_token(Blob *pFrom, Blob *pTo){
  char *aData = pFrom->aData;
  int n = pFrom->nUsed;
  int i = pFrom->iCursor;
  while( i<n && isspace(aData[i]) ){ i++; }
  pFrom->iCursor = i;
  while( i<n && !isspace(aData[i]) ){ i++; }
  blob_extract(pFrom, i-pFrom->iCursor, pTo);
  while( i<n && isspace(aData[i]) ){ i++; }
  pFrom->iCursor = i;
  return pTo->nUsed;
}

/*
** Extract everything from the current cursor to the end of the blob
** into a new blob.  The new blob is an ephemerial reference to the
** original blob.  The cursor of the original blob is unchanged.
*/
int blob_tail(Blob *pFrom, Blob *pTo){
  int iCursor = pFrom->iCursor;
  blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo);
  pFrom->iCursor = iCursor;
  return pTo->nUsed;
}

/*
** Return true if the blob contains a valid UUID_SIZE-digit base16 identifier.
*/
int blob_is_uuid(Blob *pBlob){
  return blob_size(pBlob)==UUID_SIZE
         && validate16(blob_buffer(pBlob), UUID_SIZE);
}
int blob_is_uuid_n(Blob *pBlob, int n){
  return blob_size(pBlob)==n && validate16(blob_buffer(pBlob), n);
}

/*
** Return true if the blob contains a valid 32-bit integer.  Store
** the integer value in *pValue.
*/
int blob_is_int(Blob *pBlob, int *pValue){
  const char *z = blob_buffer(pBlob);
  int i, n, c, v;
  n = blob_size(pBlob);
  v = 0;
  for(i=0; i<n && (c = z[i])!=0 && isdigit(c); i++){
    v = v*10 + c - '0';
  }
  if( i==n ){
    *pValue = v;
    return 1;
  }else{
    return 0;
  }
}

/*
** Zero or reset an array of Blobs.
*/
void blobarray_zero(Blob *aBlob, int n){
  int i;
  for(i=0; i<n; i++) blob_zero(&aBlob[i]);
}
void blobarray_reset(Blob *aBlob, int n){
  int i;
  for(i=0; i<n; i++) blob_reset(&aBlob[i]);
}

/*
** Parse a blob into space-separated tokens.  Store each token in
** an element of the blobarray aToken[].  aToken[] is nToken elements in
** size.  Return the number of tokens seen.
*/
int blob_tokenize(Blob *pIn, Blob *aToken, int nToken){
  int i;
  for(i=0; i<nToken && blob_token(pIn, &aToken[i]); i++){}
  return i;
}

/* 
** This function implements the callback from vxprintf. 
**
** This routine add nNewChar characters of text in zNewText to
** the Blob structure pointed to by "arg".
*/
static void bout(void *arg, const char *zNewText, int nNewChar){
  Blob *pBlob = (Blob*)arg;
  blob_append(pBlob, zNewText, nNewChar);
}

/*
** Do printf-style string rendering and append the results to a blob.
*/
void blob_appendf(Blob *pBlob, const char *zFormat, ...){
  va_list ap;
  va_start(ap, zFormat);
  vxprintf(bout, pBlob, zFormat, ap);
  va_end(ap);
}
void blob_vappendf(Blob *pBlob, const char *zFormat, va_list ap){
  vxprintf(bout, pBlob, zFormat, ap);
}

/*
** Initalize a blob to the data on an input channel.  Return 
** the number of bytes read into the blob.  Any prior content
** of the blob is discarded, not freed.
*/
int blob_read_from_channel(Blob *pBlob, FILE *in, int nToRead){
  size_t n;
  blob_zero(pBlob);
  if( nToRead<0 ){
    char zBuf[10000];
    while( !feof(in) ){
      n = fread(zBuf, 1, sizeof(zBuf), in);
      if( n>0 ){
        blob_append(pBlob, zBuf, n);
      }
    }
  }else{
    blob_resize(pBlob, nToRead);
    n = fread(blob_buffer(pBlob), 1, nToRead, in);
    blob_resize(pBlob, n);
  }
  return blob_size(pBlob);
}

/*
** Initialize a blob to be the content of a file.  If the filename
** is blank or "-" then read from standard input.
**
** Any prior content of the blob is discarded, not freed.
**
** Return the number of bytes read.  Return -1 for an error.
*/
int blob_read_from_file(Blob *pBlob, const char *zFilename){
  int size, got;
  FILE *in;
  if( zFilename==0 || zFilename[0]==0
        || (zFilename[0]=='-' && zFilename[1]==0) ){
    return blob_read_from_channel(pBlob, stdin, -1);
  }
  size = file_size(zFilename);
  blob_zero(pBlob);
  if( size<0 ){
    fossil_panic("no such file: %s", zFilename);
  }
  if( size==0 ){
    return 0;
  }
  blob_resize(pBlob, size);
  in = fopen(zFilename, "rb");
  if( in==0 ){
    fossil_panic("cannot open %s for reading", zFilename);
  }
  got = fread(blob_buffer(pBlob), 1, size, in);
  fclose(in);
  if( got<size ){
    blob_resize(pBlob, got);
  }
  return got;
}

/*
** Write the content of a blob into a file.
**
** If the filename is blank or "-" then write to standard output.
**
** Return the number of bytes written.
*/
int blob_write_to_file(Blob *pBlob, const char *zFilename){
  FILE *out;
  int needToClose;
  int wrote;

  if( zFilename[0]==0 || (zFilename[0]=='-' && zFilename[1]==0) ){
    out = stdout;
    needToClose = 0;
  }else{
    int i, nName;
    char *zName, zBuf[1000];

    nName = strlen(zFilename);
    if( nName>=sizeof(zBuf) ){
      zName = mprintf("%s", zFilename);
    }else{
      zName = zBuf;
      strcpy(zName, zFilename);
    }
    nName = file_simplify_name(zName, nName);
    for(i=1; i<nName; i++){
      if( zName[i]=='/' ){
        zName[i] = 0;
        if( file_mkdir(zName, 1) ){
          fossil_panic("unable to create directory %s");
        }
        zName[i] = '/';
      }
    }
    out = fopen(zName, "wb");
    if( out==0 ){
      fossil_panic("unable to open file \"%s\" for writing", zName);
    }
    needToClose = 1;
    if( zName!=zBuf ) free(zName);
  }
  blob_is_init(pBlob);
  wrote = fwrite(blob_buffer(pBlob), 1, blob_size(pBlob), out);
  if( needToClose ) fclose(out);
  if( wrote!=blob_size(pBlob) ){
    fossil_panic("short write: %d of %d bytes to %s", wrote,
       blob_size(pBlob), zFilename);
  }
  return wrote;
}

/*
** Compress a blob pIn.  Store the result in pOut.  It is ok for pIn and
** pOut to be the same blob. 
** 
** pOut must either be the same as pIn or else uninitialized.
*/
void blob_compress(Blob *pIn, Blob *pOut){
  unsigned int nIn = blob_size(pIn);
  unsigned int nOut = 13 + nIn + (nIn+999)/1000;
  unsigned long int nOut2;
  unsigned char *outBuf;
  Blob temp;
  blob_zero(&temp);
  blob_resize(&temp, nOut+4);
  outBuf = (unsigned char*)blob_buffer(&temp);
  outBuf[0] = nIn>>24 & 0xff;
  outBuf[1] = nIn>>16 & 0xff;
  outBuf[2] = nIn>>8 & 0xff;
  outBuf[3] = nIn & 0xff;
  nOut2 = (long int)nOut;
  compress(&outBuf[4], &nOut2,
           (unsigned char*)blob_buffer(pIn), blob_size(pIn));
  if( pOut==pIn ) blob_reset(pOut);
  blob_is_reset(pOut);
  *pOut = temp;
  blob_resize(pOut, nOut2+4);
}

/*
** COMMAND: test-compress
*/
void compress_cmd(void){
  Blob f;
  if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
  blob_read_from_file(&f, g.argv[2]);
  blob_compress(&f, &f);
  blob_write_to_file(&f, g.argv[3]);
}

/*
** Compress the concatenation of a blobs pIn1 and pIn2.  Store the result 
** in pOut. 
** 
** pOut must be either uninitialized or must be the same as either pIn1 or
** pIn2.
*/
void blob_compress2(Blob *pIn1, Blob *pIn2, Blob *pOut){
  unsigned int nIn = blob_size(pIn1) + blob_size(pIn2);
  unsigned int nOut = 13 + nIn + (nIn+999)/1000;
  unsigned char *outBuf;
  z_stream stream;
  Blob temp;
  blob_zero(&temp);
  blob_resize(&temp, nOut+4);
  outBuf = (unsigned char*)blob_buffer(&temp);
  outBuf[0] = nIn>>24 & 0xff;
  outBuf[1] = nIn>>16 & 0xff;
  outBuf[2] = nIn>>8 & 0xff;
  outBuf[3] = nIn & 0xff;
  stream.zalloc = (alloc_func)0;
  stream.zfree = (free_func)0;
  stream.opaque = 0;
  stream.avail_out = nOut;
  stream.next_out = &outBuf[4];
  deflateInit(&stream, 9);
  stream.avail_in = blob_size(pIn1);
  stream.next_in = (unsigned char*)blob_buffer(pIn1);
  deflate(&stream, 0);
  stream.avail_in = blob_size(pIn2);
  stream.next_in = (unsigned char*)blob_buffer(pIn2);
  deflate(&stream, 0);
  deflate(&stream, Z_FINISH);
  blob_resize(&temp, stream.total_out + 4);
  deflateEnd(&stream);
  if( pOut==pIn1 ) blob_reset(pOut);
  if( pOut==pIn2 ) blob_reset(pOut);
  blob_is_reset(pOut);
  *pOut = temp;
}

/*
** COMMAND: test-compress-2
*/
void compress2_cmd(void){
  Blob f1, f2;
  if( g.argc!=5 ) usage("INPUTFILE1 INPUTFILE2 OUTPUTFILE");
  blob_read_from_file(&f1, g.argv[2]);
  blob_read_from_file(&f2, g.argv[3]);
  blob_compress2(&f1, &f2, &f1);
  blob_write_to_file(&f1, g.argv[4]);
}

/*
** Uncompress blob pIn and store the result in pOut.  It is ok for pIn and
** pOut to be the same blob.
**
** pOut must be either uninitialized or the same as pIn.
*/
int blob_uncompress(Blob *pIn, Blob *pOut){
  unsigned int nOut;
  unsigned char *inBuf;
  unsigned int nIn = blob_size(pIn);
  Blob temp;
  int rc;
  unsigned long int nOut2;
  if( nIn<=4 ){
    return 0;
  }
  inBuf = (unsigned char*)blob_buffer(pIn);
  nOut = (inBuf[0]<<24) + (inBuf[1]<<16) + (inBuf[2]<<8) + inBuf[3];
  blob_zero(&temp);
  blob_resize(&temp, nOut+1);
  nOut2 = (long int)nOut;
  rc = uncompress((unsigned char*)blob_buffer(&temp), &nOut2, 
                  &inBuf[4], blob_size(pIn));
  if( rc!=Z_OK ){
    blob_reset(&temp);
    return 1;
  }
  blob_resize(&temp, nOut2);
  if( pOut==pIn ) blob_reset(pOut);
  blob_is_reset(pOut);
  *pOut = temp;
  return 0;
}

/*
** COMMAND: test-uncompress
*/
void uncompress_cmd(void){
  Blob f;
  if( g.argc!=4 ) usage("INPUTFILE OUTPUTFILE");
  blob_read_from_file(&f, g.argv[2]);
  blob_uncompress(&f, &f);
  blob_write_to_file(&f, g.argv[3]);
}

/*
** COMMAND: test-cycle-compress
**
** Compress and uncompress each file named on the command line.
** Verify that the original content is recovered.
*/
void test_cycle_compress(void){
  int i;
  Blob b1, b2, b3;
  for(i=2; i<g.argc; i++){
    blob_read_from_file(&b1, g.argv[i]);
    blob_compress(&b1, &b2);
    blob_uncompress(&b2, &b3);
    if( blob_compare(&b1, &b3) ){
      fossil_panic("compress/uncompress cycle failed for %s", g.argv[i]);
    }
    blob_reset(&b1);
    blob_reset(&b2);
    blob_reset(&b3);
  }
  printf("ok\n");
}