From 2adf4fb28af99dd72c6b2fc816bcc11e5dde1ffc Mon Sep 17 00:00:00 2001 From: erdgeist <> Date: Fri, 8 Dec 2006 19:20:51 +0000 Subject: [PATCH] Our scanner routine for the URI query string --- scan_urlencoded_query.c | 57 +++++++++++++++++++++++++++++++++++++++++ scan_urlencoded_query.h | 20 +++++++++++++++ trackerlogic.c | 4 +-- 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 scan_urlencoded_query.c create mode 100644 scan_urlencoded_query.h diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c new file mode 100644 index 0000000..7aeabab --- /dev/null +++ b/scan_urlencoded_query.c @@ -0,0 +1,57 @@ +#include "scan.h" + +#define BREAK_AT_QUESTIONMARK (1<<0) +#define BREAK_AT_WHITESPACE (1<<1) +#define BREAK_AT_AMPERSAND (1<<2) +#define BREAK_AT_EQUALSIGN (1<<3) + +#define SCAN_PATH ( BREAK_AT_QUESTIONMARK | BREAK_AT_WHITESPACE ) +#define SCAN_SEARCHPATH_PARAM ( BREAK_AT_EQUALSIGN ) +#define SCAN_SEARCHPATH_VALUE ( BREAK_AT_AMPERSAND | BREAK_AT_WHITESPACE ) + +// Idea is to do a in place replacement or guarantee at least +// strlen( string ) bytes in deststring +// watch http://www.ietf.org/rfc/rfc2396.txt +// unreserved = alphanum | mark +// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" +// we add '%' to the matrix to not stop at encoded chars. + +static const unsigned char reserved_matrix[] = { 0xA2, 0x63, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47}; +inline int is_unreserved( unsigned char c ) const { + if( ( c <= 32 ) || ( c >= 127 ) ) return 0; return 1&(reserved_matrix[(c-32)>>3]>>(c&7)); +} + +size_t scan_urlencoded_query(char **string, char *deststring, int flags) { + register const unsigned char* s=*(const unsigned char*) string; + const unsigned char *d = deststring; + register unsigned char b, c; + + while ( is_unreserved( c = *s++) ) { + if (c=='%') { + if( ( c = scan_fromhex(*s++) ) < 0 ) return -1; + if( ( b = scan_fromhex(*s++) ) < 0 ) return -1; + c=(c<<4)|b; + } + *d++ = c; + } + + switch( c ) { + case 0: case '\r': case '\n': case ' ': + if ( flags & BREAK_AT_WHITESPACE == 0 ) return -1; + break; + case '?': + if ( flags & BREAK_AT_QUESTIONMARK == 0 ) return -1; + break; + case '=': + if ( flags & BREAK_AT_EQUALSIGN == 0 ) return -1; + break; + case '&': + if ( flags & BREAK_AT_AMPERSAND == 0 ) return -1; + break; + default: + return -1; + } + + *string = s; + return d - deststring; +} diff --git a/scan_urlencoded_query.h b/scan_urlencoded_query.h new file mode 100644 index 0000000..379bc32 --- /dev/null +++ b/scan_urlencoded_query.h @@ -0,0 +1,20 @@ +#ifdef __SCAN_URLENCODED_QUERY_H__ +#define __SCAN_URLENCODED_QUERY_H__ + +#define BREAK_AT_QUESTIONMARK (1<<0) +#define BREAK_AT_WHITESPACE (1<<1) +#define BREAK_AT_AMPERSAND (1<<2) +#define BREAK_AT_EQUALSIGN (1<<3) + +#define SCAN_PATH ( BREAK_AT_QUESTIONMARK | BREAK_AT_WHITESPACE ) +#define SCAN_SEARCHPATH_PARAM ( BREAK_AT_EQUALSIGN ) +#define SCAN_SEARCHPATH_VALUE ( BREAK_AT_AMPERSAND | BREAK_AT_WHITESPACE ) + +// string pointer to source, pointer to after terminator on return +// deststring pointer to destination +// flags determines, what to parse +// returns number of valid converted characters in deststring +// or -1 for parse error +size_t scan_urlencoded_query(char **string, char *deststring, int flags); + +#endif diff --git a/trackerlogic.c b/trackerlogic.c index 735041e..6274c41 100644 --- a/trackerlogic.c +++ b/trackerlogic.c @@ -162,7 +162,7 @@ void return_peers_for_torrent( ot_torrent torrent, unsigned long amount, char *r // Compacts a torrents peer list // * torrents older than OT_TIMEOUT are being kicked -// * is rather expansive +// * is rather expensive // * if this fails, torrent file is invalid, should add flag // void heal_torrent( ot_torrent torrent ) { @@ -269,7 +269,7 @@ int init_logic( char *directory ) { // Scan directory for filenames in the form [0-9A-F]{20} // * I know this looks ugly, but I've seen A-F to match umlauts as well in strange locales - // * lower case for .. better being safe than sorry, this is not expansive here :) + // * lower case for .. better being safe than sorry, this is not expensive here :) if( !glob( "[0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef]" "[0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef]"