7f9226a858 2008-02-06 stephan: #include <string.h> 7f9226a858 2008-02-06 stephan: #include <stdlib.h> 58ee4e6e16 2008-02-07 stephan: #include "tokenize_path.h" 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: /** 7f9226a858 2008-02-06 stephan: tokenize_path_free() is the only publically-defined way to deallocate 7f9226a858 2008-02-06 stephan: a string array created by tokenize_path(). It must be called exactly 7f9226a858 2008-02-06 stephan: once for each return value from tokenize_path(). Failing to call it 7f9226a858 2008-02-06 stephan: will result in a memory leak. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: If (!p) then this function does nothing. Passing a pointer which was 7f9226a858 2008-02-06 stephan: not returned from tokenize_path() will result in undefined behaviour. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: After calling this, p's contents are invalid. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: void tokenize_path_free( char ** p ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: if( p ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: /* Free the tokenized strings (a single string, actually): */ 7f9226a858 2008-02-06 stephan: free( *(p-1) ); 7f9226a858 2008-02-06 stephan: /* Free p from its REAL starting point. */ 7f9226a858 2008-02-06 stephan: free( p-1 ); 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: /** 7f9226a858 2008-02-06 stephan: tokenize_path_is_separator() is the default predicate function for 7f9226a858 2008-02-06 stephan: tokenize_path(). It returns 1 if (c == '/'), else it returns 0. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: int tokenize_path_is_separator( int c ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: return (c == '/'); 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: /** 7f9226a858 2008-02-06 stephan: tokenize_path() takes a string, assumed to be a delimited 7f9226a858 2008-02-06 stephan: null-terminated path-style string (like a path to a file), and 7f9226a858 2008-02-06 stephan: tokenizes it into its component parts. The 'out' parameter (if not 7f9226a858 2008-02-06 stephan: null) is set to the number of tokenized items (may be 0). 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: The third argument is a unary predicate function which takes 7f9226a858 2008-02-06 stephan: a single character and returns true only if that character 7f9226a858 2008-02-06 stephan: is a "separator character". If the 3rd argument is 0 then 7f9226a858 2008-02-06 stephan: tokenize_path_is_separator() is used. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: The function returns a list of strings (or 0) which must be freed 7f9226a858 2008-02-06 stephan: via tokenize_path_free() because the internal allocation of the 7f9226a858 2008-02-06 stephan: return result is a bit tricky (to minimize on allocations). DO NOT 7f9226a858 2008-02-06 stephan: pass the return result to free(), as that will cause undefined 7f9226a858 2008-02-06 stephan: behaviour. Because the returned array is null-terminated, the second 7f9226a858 2008-02-06 stephan: parameter is normally not needed because the array can safely 7f9226a858 2008-02-06 stephan: be looped over without knowing its length in advance. Nonetheless, 7f9226a858 2008-02-06 stephan: having the count before looping may be useful for some cases. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: However, 7f9226a858 2008-02-06 stephan: the returned array 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: The returned string array is always null-terminated, to simplify 7f9226a858 2008-02-06 stephan: looping over it. The function returns null if the input string is 7f9226a858 2008-02-06 stephan: null, empty, or contains only separator characters. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: Tokenizing behaviour: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: - It assumes that ALL non-separator chars are entry names. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: - It treats runs of multiple separators chars as a single 7f9226a858 2008-02-06 stephan: separator, NOT as a series of empty tokens. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: - It has no knowledge of relative or absolute paths, so 7f9226a858 2008-02-06 stephan: "." and ".." are considered to be normal entries. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: - The returned strings are non-const, but the caller must not 7f9226a858 2008-02-06 stephan: change their sizes or reallocate them at different memory 7f9226a858 2008-02-06 stephan: addresses. The only legal way to deallocate them is with 7f9226a858 2008-02-06 stephan: tokenize_path_free(). Changing the string content IS is legal. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: e.g.: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: "/path/to/nowhere" and "path/to///nowhere/" both parse to: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: Parses to: { "path", "to", "nowhere" } 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: "/./../" 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: Parses to: { ".", ".." } 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: "http://foo.com/bar" 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: Parses to: { "http:", "foo.com", "bar" } 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: (Note that those arrays all have an implicit NULL entry as their 7f9226a858 2008-02-06 stephan: last element. ) 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: char ** tokenize_path( char const * in, 7f9226a858 2008-02-06 stephan: int * out, 7f9226a858 2008-02-06 stephan: int (*predicate)( int ) 7f9226a858 2008-02-06 stephan: ) 7f9226a858 2008-02-06 stephan: { /* Author: sgbeal@googlemail.com. License: Public Domain. */ 7f9226a858 2008-02-06 stephan: int ignored; 7f9226a858 2008-02-06 stephan: if( ! out ) out = &ignored; 7f9226a858 2008-02-06 stephan: *out = 0; 7f9226a858 2008-02-06 stephan: typedef int (*sep_f)( int ); 7f9226a858 2008-02-06 stephan: sep_f is_sep = (predicate ? predicate : tokenize_path_is_separator); 7f9226a858 2008-02-06 stephan: int inlen = strlen(in); 7f9226a858 2008-02-06 stephan: if( (! in) || (0==inlen) ) return 0; 7f9226a858 2008-02-06 stephan: char * cp = malloc( inlen + 1 ); 7f9226a858 2008-02-06 stephan: /** 7f9226a858 2008-02-06 stephan: We make a copy because: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: Our algorithm is to replace separators with 0 in our copy, and 7f9226a858 2008-02-06 stephan: use that copy as our return value. This allows us to avoid 7f9226a858 2008-02-06 stephan: allocating a new string for each returned result. 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: strcpy( cp, in ); 7f9226a858 2008-02-06 stephan: /** 7f9226a858 2008-02-06 stephan: buffsize = the largest possible number of return result we can 7f9226a858 2008-02-06 stephan: have, plus 1 (to allow for truncated division). The maximum size 7f9226a858 2008-02-06 stephan: is determined based on worst-case scenario: a list of single 7f9226a858 2008-02-06 stephan: characters, each separated by one separators, e.g. "/1/1/1/1/1" 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: const int buffsize = inlen / 2 + 1; 7f9226a858 2008-02-06 stephan: /* 'starts' stores the starting point of each path component 7f9226a858 2008-02-06 stephan: substring of 'cp'. When we slice up 'cp' below, starts[x] 7f9226a858 2008-02-06 stephan: will be set to point to a particular position within 'cp'. 7f9226a858 2008-02-06 stephan: That allows us to avoid allocating/copying each element 7f9226a858 2008-02-06 stephan: separately. 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: char * starts[buffsize]; 7f9226a858 2008-02-06 stephan: int i = 0; 7f9226a858 2008-02-06 stephan: for( i = 0; i < buffsize; ++i ) starts[i] = 0; 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: char * curs = cp; 7f9226a858 2008-02-06 stephan: for( curs = cp; is_sep(*curs); ++curs ); 7f9226a858 2008-02-06 stephan: /* ^^^ We skip leading separators so we can easily 7f9226a858 2008-02-06 stephan: mark where the first entry string actually begins. 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: if( '\0' == curs ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: free( cp ); 7f9226a858 2008-02-06 stephan: return 0; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: char * mark = curs; /* placeholder for holding the head addr of strings. */ 7f9226a858 2008-02-06 stephan: int count = 0; /* total number of elements we end up tokenizing. */ 7f9226a858 2008-02-06 stephan: int started = 0; /* toggled when we enter a new path element. */ 7f9226a858 2008-02-06 stephan: for( ; *curs != '\0'; ++curs ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: /** Replace '/' with '\0'... */ 7f9226a858 2008-02-06 stephan: if( is_sep(*curs) ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: *curs = '\0'; 7f9226a858 2008-02-06 stephan: mark = curs+1; 7f9226a858 2008-02-06 stephan: started = 0; 7f9226a858 2008-02-06 stephan: continue; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: if( ! started ) 7f9226a858 2008-02-06 stephan: { /** Start a new path element... */ 7f9226a858 2008-02-06 stephan: starts[count] = mark; 7f9226a858 2008-02-06 stephan: started = 1; 7f9226a858 2008-02-06 stephan: ++count; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: if( ! starts[0] ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: free( cp ); 7f9226a858 2008-02-06 stephan: return 0; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: cp[inlen] = '\0'; 7f9226a858 2008-02-06 stephan: char ** ret = calloc( count + 2, sizeof(char*) ); 7f9226a858 2008-02-06 stephan: /* We over-allocate by 2 entries. The first one holds the address of 7f9226a858 2008-02-06 stephan: 'cp' and the last one is set to 0 to simplify looping over the 7f9226a858 2008-02-06 stephan: array. */ 7f9226a858 2008-02-06 stephan: *out = count; 7f9226a858 2008-02-06 stephan: ret[0] = cp; 7f9226a858 2008-02-06 stephan: ++ret; 7f9226a858 2008-02-06 stephan: /** 7f9226a858 2008-02-06 stephan: We're going to hide that [0] entry from the caller. Instead, we 7f9226a858 2008-02-06 stephan: use that to hold the address of 'cp'. In tokenize_path_free() 7f9226a858 2008-02-06 stephan: we release both that string and (ret-1). 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: for( i = 0; i < count; ++i ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: ret[i] = starts[i]; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: ret[count] = 0; 7f9226a858 2008-02-06 stephan: return ret; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: extern void cgi_printf(const char *zFormat,...); 7f9226a858 2008-02-06 stephan: /** 7f9226a858 2008-02-06 stephan: render_linked_path() takes a root path and a /unix/style/path and 7f9226a858 2008-02-06 stephan: renders (using cgi_printf()) a clickable list of the entries in the 7f9226a858 2008-02-06 stephan: path. If path is null it does nothing. If root is null it is treated 7f9226a858 2008-02-06 stephan: as an empty string. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: Example: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: render_linked_path( "/AAA", "b/c/d" ); 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: It would render a list similar to the following, 7f9226a858 2008-02-06 stephan: but think of the text in [brackets] as hyperlinked: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: [b]/[c]/[d] 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: Each element is linked to a path like so: 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: b: root/b 7f9226a858 2008-02-06 stephan: c: root/b/c 7f9226a858 2008-02-06 stephan: d: root/b/c/d 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: If root is null then the 'root/' part is not applied. 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: */ 7f9226a858 2008-02-06 stephan: void render_linked_path( char const * root, 7f9226a858 2008-02-06 stephan: char const * path ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: int count = 0; 7f9226a858 2008-02-06 stephan: char ** toks = tokenize_path( path, &count, 0 ); 7f9226a858 2008-02-06 stephan: if( ! toks ) return; 7f9226a858 2008-02-06 stephan: char const * t = 0; 7f9226a858 2008-02-06 stephan: int pos = 0; 7f9226a858 2008-02-06 stephan: for( t = toks[pos]; t; t = toks[++pos] ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: cgi_printf( "<a href='" ); 7f9226a858 2008-02-06 stephan: if( root ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: cgi_printf( "%s/", root ); 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: int bpos = 0; 7f9226a858 2008-02-06 stephan: for( ; bpos < pos; ++bpos ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: cgi_printf("%s/", toks[bpos] ); 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: cgi_printf("%s'>%s</a>", t, t ); 7f9226a858 2008-02-06 stephan: if( pos != (count-1) ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: cgi_printf("/"); 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: tokenize_path_free( toks ); 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: #if 0 /* set to 1 to compile a test app. */ 7f9226a858 2008-02-06 stephan: #include <stdio.h> 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: static int sep_char = '?'; 7f9226a858 2008-02-06 stephan: static int is_sep_char( int c ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: return c == sep_char; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: int main( int argc, char ** argv ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: 7f9226a858 2008-02-06 stephan: int count = 0; 7f9226a858 2008-02-06 stephan: sep_char = ( (argc>2) ? (argv[2])[0] : '/'); 7f9226a858 2008-02-06 stephan: printf( "sep_char==%c\n",sep_char); 7f9226a858 2008-02-06 stephan: char ** l = tokenize_path( argc==1 ? 0 : argv[1], 7f9226a858 2008-02-06 stephan: &count, 7f9226a858 2008-02-06 stephan: is_sep_char ); 7f9226a858 2008-02-06 stephan: printf( "parsed path: count=%d\n", count ); 7f9226a858 2008-02-06 stephan: if( ! count ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: printf("error: path didn't parse :(\n"); 7f9226a858 2008-02-06 stephan: return 1; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: char * x; 7f9226a858 2008-02-06 stephan: int i = 0; 7f9226a858 2008-02-06 stephan: for( x = l[0]; x; x = l[++i] ) 7f9226a858 2008-02-06 stephan: { 7f9226a858 2008-02-06 stephan: printf( "\t%s\n", x ); 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: tokenize_path_free( l ); 7f9226a858 2008-02-06 stephan: printf( "Bye!\n"); 7f9226a858 2008-02-06 stephan: return 0; 7f9226a858 2008-02-06 stephan: } 7f9226a858 2008-02-06 stephan: #endif