File Annotation
Not logged in
7f9226a858 2008-02-06   stephan: #include <string.h>
7f9226a858 2008-02-06   stephan: #include <stdlib.h>
58ee4e6e16 2008-02-07   stephan: #include "tokenize_path.h"
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: /**
7f9226a858 2008-02-06   stephan: tokenize_path_free() is the only publically-defined way to deallocate
7f9226a858 2008-02-06   stephan: a string array created by tokenize_path().  It must be called exactly
7f9226a858 2008-02-06   stephan: once for each return value from tokenize_path(). Failing to call it
7f9226a858 2008-02-06   stephan: will result in a memory leak.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: If (!p) then this function does nothing.  Passing a pointer which was
7f9226a858 2008-02-06   stephan: not returned from tokenize_path() will result in undefined behaviour.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: After calling this, p's contents are invalid.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: */
7f9226a858 2008-02-06   stephan: void tokenize_path_free( char ** p )
7f9226a858 2008-02-06   stephan: {
7f9226a858 2008-02-06   stephan:   if( p )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     /* Free the tokenized strings (a single string, actually): */
7f9226a858 2008-02-06   stephan:     free( *(p-1) );
7f9226a858 2008-02-06   stephan:     /* Free p from its REAL starting point. */
7f9226a858 2008-02-06   stephan:     free( p-1 );
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan: }
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: /**
7f9226a858 2008-02-06   stephan: tokenize_path_is_separator() is the default predicate function for
7f9226a858 2008-02-06   stephan: tokenize_path(). It returns 1 if (c == '/'), else it returns 0.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: */
7f9226a858 2008-02-06   stephan: int tokenize_path_is_separator( int c )
7f9226a858 2008-02-06   stephan: {
7f9226a858 2008-02-06   stephan:   return (c == '/');
7f9226a858 2008-02-06   stephan: }
7f9226a858 2008-02-06   stephan: /**
7f9226a858 2008-02-06   stephan:    tokenize_path() takes a string, assumed to be a delimited
7f9226a858 2008-02-06   stephan:    null-terminated path-style string (like a path to a file), and
7f9226a858 2008-02-06   stephan:    tokenizes it into its component parts. The 'out' parameter (if not
7f9226a858 2008-02-06   stephan:    null) is set to the number of tokenized items (may be 0).
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    The third argument is a unary predicate function which takes
7f9226a858 2008-02-06   stephan:    a single character and returns true only if that character
7f9226a858 2008-02-06   stephan:    is a "separator character". If the 3rd argument is 0 then
7f9226a858 2008-02-06   stephan:    tokenize_path_is_separator() is used.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    The function returns a list of strings (or 0) which must be freed
7f9226a858 2008-02-06   stephan:    via tokenize_path_free() because the internal allocation of the
7f9226a858 2008-02-06   stephan:    return result is a bit tricky (to minimize on allocations).  DO NOT
7f9226a858 2008-02-06   stephan:    pass the return result to free(), as that will cause undefined
7f9226a858 2008-02-06   stephan:    behaviour. Because the returned array is null-terminated, the second
7f9226a858 2008-02-06   stephan:    parameter is normally not needed because the array can safely
7f9226a858 2008-02-06   stephan:    be looped over without knowing its length in advance. Nonetheless,
7f9226a858 2008-02-06   stephan:    having the count before looping may be useful for some cases.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: However,
7f9226a858 2008-02-06   stephan:    the returned array
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    The returned string array is always null-terminated, to simplify
7f9226a858 2008-02-06   stephan:    looping over it. The function returns null if the input string is
7f9226a858 2008-02-06   stephan:    null, empty, or contains only separator characters.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    Tokenizing behaviour:
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    - It assumes that ALL non-separator chars are entry names.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    - It treats runs of multiple separators chars as a single
7f9226a858 2008-02-06   stephan:    separator, NOT as a series of empty tokens.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    - It has no knowledge of relative or absolute paths, so
7f9226a858 2008-02-06   stephan:    "." and ".." are considered to be normal entries.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    - The returned strings are non-const, but the caller must not
7f9226a858 2008-02-06   stephan:    change their sizes or reallocate them at different memory
7f9226a858 2008-02-06   stephan:    addresses. The only legal way to deallocate them is with
7f9226a858 2008-02-06   stephan:    tokenize_path_free(). Changing the string content IS is legal.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    e.g.:
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    "/path/to/nowhere" and "path/to///nowhere/" both parse to:
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    Parses to: { "path", "to", "nowhere" }
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    "/./../"
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    Parses to: { ".", ".." }
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    "http://foo.com/bar"
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    Parses to: { "http:", "foo.com", "bar" }
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:    (Note that those arrays all have an implicit NULL entry as their
7f9226a858 2008-02-06   stephan:    last element. )
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: */
7f9226a858 2008-02-06   stephan: char ** tokenize_path( char const * in,
7f9226a858 2008-02-06   stephan: 		       int * out,
7f9226a858 2008-02-06   stephan: 		       int (*predicate)( int )
7f9226a858 2008-02-06   stephan: 		       )
7f9226a858 2008-02-06   stephan: { /* Author: sgbeal@googlemail.com. License: Public Domain. */
7f9226a858 2008-02-06   stephan:   int ignored;
7f9226a858 2008-02-06   stephan:   if( ! out ) out = &ignored;
7f9226a858 2008-02-06   stephan:   *out = 0;
7f9226a858 2008-02-06   stephan:   typedef int (*sep_f)( int );
7f9226a858 2008-02-06   stephan:   sep_f is_sep = (predicate ? predicate : tokenize_path_is_separator);
7f9226a858 2008-02-06   stephan:   int inlen = strlen(in);
7f9226a858 2008-02-06   stephan:   if( (! in) || (0==inlen) ) return 0;
7f9226a858 2008-02-06   stephan:   char * cp = malloc( inlen + 1 );
7f9226a858 2008-02-06   stephan:   /**
7f9226a858 2008-02-06   stephan:      We make a copy because:
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:      Our algorithm is to replace separators with 0 in our copy, and
7f9226a858 2008-02-06   stephan:      use that copy as our return value. This allows us to avoid
7f9226a858 2008-02-06   stephan:      allocating a new string for each returned result.
7f9226a858 2008-02-06   stephan:   */
7f9226a858 2008-02-06   stephan:   strcpy( cp, in );
7f9226a858 2008-02-06   stephan:   /**
7f9226a858 2008-02-06   stephan:      buffsize = the largest possible number of return result we can
7f9226a858 2008-02-06   stephan:      have, plus 1 (to allow for truncated division). The maximum size
7f9226a858 2008-02-06   stephan:      is determined based on worst-case scenario: a list of single
7f9226a858 2008-02-06   stephan:      characters, each separated by one separators, e.g.  "/1/1/1/1/1"
7f9226a858 2008-02-06   stephan:   */
7f9226a858 2008-02-06   stephan:   const int buffsize = inlen / 2 + 1;
7f9226a858 2008-02-06   stephan:   /* 'starts' stores the starting point of each path component
7f9226a858 2008-02-06   stephan:      substring of 'cp'. When we slice up 'cp' below, starts[x]
7f9226a858 2008-02-06   stephan:      will be set to point to a particular position within 'cp'.
7f9226a858 2008-02-06   stephan:      That allows us to avoid allocating/copying each element
7f9226a858 2008-02-06   stephan:      separately.
7f9226a858 2008-02-06   stephan:   */
7f9226a858 2008-02-06   stephan:   char * starts[buffsize];
7f9226a858 2008-02-06   stephan:   int i = 0;
7f9226a858 2008-02-06   stephan:   for( i = 0; i < buffsize; ++i ) starts[i] = 0;
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:   char * curs = cp;
7f9226a858 2008-02-06   stephan:   for( curs = cp; is_sep(*curs); ++curs );
7f9226a858 2008-02-06   stephan:   /* ^^^ We skip leading separators so we can easily
7f9226a858 2008-02-06   stephan:      mark where the first entry string actually begins.
7f9226a858 2008-02-06   stephan:   */
7f9226a858 2008-02-06   stephan:   if( '\0' == curs )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     free( cp );
7f9226a858 2008-02-06   stephan:     return 0;
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan:   char * mark = curs; /* placeholder for holding the head addr of strings. */
7f9226a858 2008-02-06   stephan:   int count = 0; /* total number of elements we end up tokenizing. */
7f9226a858 2008-02-06   stephan:   int started = 0; /* toggled when we enter a new path element. */
7f9226a858 2008-02-06   stephan:   for( ; *curs != '\0'; ++curs )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     /** Replace '/' with '\0'... */
7f9226a858 2008-02-06   stephan:     if( is_sep(*curs) )
7f9226a858 2008-02-06   stephan:     {
7f9226a858 2008-02-06   stephan:       *curs = '\0';
7f9226a858 2008-02-06   stephan:       mark = curs+1;
7f9226a858 2008-02-06   stephan:       started = 0;
7f9226a858 2008-02-06   stephan:       continue;
7f9226a858 2008-02-06   stephan:     }
7f9226a858 2008-02-06   stephan:     if( ! started )
7f9226a858 2008-02-06   stephan:     { /** Start a new path element... */
7f9226a858 2008-02-06   stephan:       starts[count] = mark;
7f9226a858 2008-02-06   stephan:       started = 1;
7f9226a858 2008-02-06   stephan:       ++count;
7f9226a858 2008-02-06   stephan:     }
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan:   if( ! starts[0] )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     free( cp );
7f9226a858 2008-02-06   stephan:     return 0;
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan:   cp[inlen] = '\0';
7f9226a858 2008-02-06   stephan:   char ** ret = calloc( count + 2, sizeof(char*) );
7f9226a858 2008-02-06   stephan:   /* We over-allocate by 2 entries. The first one holds the address of
7f9226a858 2008-02-06   stephan:    'cp' and the last one is set to 0 to simplify looping over the
7f9226a858 2008-02-06   stephan:    array. */
7f9226a858 2008-02-06   stephan:   *out = count;
7f9226a858 2008-02-06   stephan:   ret[0] = cp;
7f9226a858 2008-02-06   stephan:   ++ret;
7f9226a858 2008-02-06   stephan:   /**
7f9226a858 2008-02-06   stephan:      We're going to hide that [0] entry from the caller. Instead, we
7f9226a858 2008-02-06   stephan:      use that to hold the address of 'cp'. In tokenize_path_free()
7f9226a858 2008-02-06   stephan:      we release both that string and (ret-1).
7f9226a858 2008-02-06   stephan:   */
7f9226a858 2008-02-06   stephan:   for( i = 0; i < count; ++i )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     ret[i] = starts[i];
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan:   ret[count] = 0;
7f9226a858 2008-02-06   stephan:   return ret;
7f9226a858 2008-02-06   stephan: }
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: extern void cgi_printf(const char *zFormat,...);
7f9226a858 2008-02-06   stephan: /**
7f9226a858 2008-02-06   stephan: render_linked_path() takes a root path and a /unix/style/path and
7f9226a858 2008-02-06   stephan: renders (using cgi_printf()) a clickable list of the entries in the
7f9226a858 2008-02-06   stephan: path. If path is null it does nothing. If root is null it is treated
7f9226a858 2008-02-06   stephan: as an empty string.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: Example:
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: render_linked_path( "/AAA", "b/c/d" );
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: It would render a list similar to the following,
7f9226a858 2008-02-06   stephan: but think of the text in [brackets] as hyperlinked:
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:   [b]/[c]/[d]
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:   Each element is linked to a path like so:
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:   b: root/b
7f9226a858 2008-02-06   stephan:   c: root/b/c
7f9226a858 2008-02-06   stephan:   d: root/b/c/d
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: If root is null then the 'root/' part is not applied.
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: */
7f9226a858 2008-02-06   stephan: void render_linked_path( char const * root,
7f9226a858 2008-02-06   stephan:                          char const * path )
7f9226a858 2008-02-06   stephan: {
7f9226a858 2008-02-06   stephan:   int count = 0;
7f9226a858 2008-02-06   stephan:   char ** toks = tokenize_path( path, &count, 0 );
7f9226a858 2008-02-06   stephan:   if( ! toks ) return;
7f9226a858 2008-02-06   stephan:   char const * t = 0;
7f9226a858 2008-02-06   stephan:   int pos = 0;
7f9226a858 2008-02-06   stephan:   for( t = toks[pos]; t; t = toks[++pos] )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     cgi_printf( "<a href='" );
7f9226a858 2008-02-06   stephan:     if( root )
7f9226a858 2008-02-06   stephan:     {
7f9226a858 2008-02-06   stephan:       cgi_printf( "%s/", root );
7f9226a858 2008-02-06   stephan:     }
7f9226a858 2008-02-06   stephan:     int bpos = 0;
7f9226a858 2008-02-06   stephan:     for( ; bpos < pos; ++bpos )
7f9226a858 2008-02-06   stephan:     {
7f9226a858 2008-02-06   stephan:       cgi_printf("%s/", toks[bpos] );
7f9226a858 2008-02-06   stephan:     }
7f9226a858 2008-02-06   stephan:     cgi_printf("%s'>%s</a>", t, t );
7f9226a858 2008-02-06   stephan:     if( pos != (count-1) )
7f9226a858 2008-02-06   stephan:     {
7f9226a858 2008-02-06   stephan:       cgi_printf("/");
7f9226a858 2008-02-06   stephan:     }
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan:   tokenize_path_free( toks );
7f9226a858 2008-02-06   stephan: }
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: #if 0 /* set to 1 to compile a test app. */
7f9226a858 2008-02-06   stephan: #include <stdio.h>
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan: static int sep_char = '?';
7f9226a858 2008-02-06   stephan: static int is_sep_char( int c )
7f9226a858 2008-02-06   stephan: {
7f9226a858 2008-02-06   stephan:   return c == sep_char;
7f9226a858 2008-02-06   stephan: }
7f9226a858 2008-02-06   stephan: int main( int argc, char ** argv )
7f9226a858 2008-02-06   stephan: {
7f9226a858 2008-02-06   stephan: 
7f9226a858 2008-02-06   stephan:   int count = 0;
7f9226a858 2008-02-06   stephan:   sep_char = ( (argc>2) ? (argv[2])[0] : '/');
7f9226a858 2008-02-06   stephan:   printf( "sep_char==%c\n",sep_char);
7f9226a858 2008-02-06   stephan:   char ** l = tokenize_path( argc==1 ? 0 : argv[1],
7f9226a858 2008-02-06   stephan:                              &count,
7f9226a858 2008-02-06   stephan:                              is_sep_char );
7f9226a858 2008-02-06   stephan:   printf( "parsed path: count=%d\n", count );
7f9226a858 2008-02-06   stephan:   if( ! count )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     printf("error: path didn't parse :(\n");
7f9226a858 2008-02-06   stephan:     return 1;
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan:   char * x;
7f9226a858 2008-02-06   stephan:   int i = 0;
7f9226a858 2008-02-06   stephan:   for( x = l[0]; x; x = l[++i] )
7f9226a858 2008-02-06   stephan:   {
7f9226a858 2008-02-06   stephan:     printf( "\t%s\n", x );
7f9226a858 2008-02-06   stephan:   }
7f9226a858 2008-02-06   stephan:   tokenize_path_free( l );
7f9226a858 2008-02-06   stephan:   printf( "Bye!\n");
7f9226a858 2008-02-06   stephan:   return 0;
7f9226a858 2008-02-06   stephan: }
7f9226a858 2008-02-06   stephan: #endif