diff --git a/textcode/scan_base64.3 b/textcode/scan_base64.3 index b7deb43..64e0abc 100644 --- a/textcode/scan_base64.3 +++ b/textcode/scan_base64.3 @@ -16,10 +16,6 @@ Note that real world base64 encoded data is sometimes permitted to contain whitespace characters or new lines. This function will not allow those and return the decoded data until then. -base64 works by taking 3 bytes of binary input and converting them into -4 bytes of printable ASCII. If the input ends in the middle of a base64 -4-byte-tuple, scan_base64 will disregard the whole tuple. - Many base64 variants demand padding in the last block. Some don't. This implementation will consume padding if it is there, but will not complain if it is not. @@ -30,7 +26,7 @@ dest can be NULL. destlen can be NULL. scan_base64 returns the number of bytes successfully scanned and processed from src. .SH EXAMPLES -scan_base64("%9FYO return 8, i=5, buf="fnord" +scan_base64("Zm5vcmQ=",buf,&i) -> return 8, i=5, buf="fnord" .SH "SEE ALSO" -scan_xlong(3), scan_8long(3), fmt_ulong(3) +scan_base64url(3), scan_xlong(3), scan_8long(3), fmt_ulong(3) diff --git a/textcode/scan_base64.c b/textcode/scan_base64.c index 6fdc5c7..41aa03e 100644 --- a/textcode/scan_base64.c +++ b/textcode/scan_base64.c @@ -48,8 +48,10 @@ int main() { char buf[100]; size_t i,l; memset(buf,0,10); assert(scan_base64("Zm5vcmQ=",buf,&l)==8 && l==5 && !memcmp(buf,"fnord",6)); + /* check that we don't insist on the padding */ memset(buf,0,10); assert(scan_base64("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6)); - memset(buf,0,10); assert(scan_base64("//8=",buf,&l)==4 && l==2 && !memcmp(buf,"\xff\xff",3)); + /* check the special non-isalnum chars :) */ + memset(buf,0,10); assert(scan_base64("/+8=",buf,&l)==4 && l==2 && !memcmp(buf,"\xff\xef",3)); return 0; } #endif diff --git a/textcode/scan_base64url.3 b/textcode/scan_base64url.3 new file mode 100644 index 0000000..c0ccd0a --- /dev/null +++ b/textcode/scan_base64url.3 @@ -0,0 +1,32 @@ +.TH scan_base64url 3 +.SH NAME +scan_base64url \- decode base64url encoded data +.SH SYNTAX +.B #include + +size_t \fBscan_base64url\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdestlen\fR); + +.SH DESCRIPTION +base64url is a variant of base64 for use in URLs (standard base64 uses / +and +, which can cause problems in URLs, so base64url uses - and _ +instead; also base64url does not use = padding at the end). + +scan_base64url decodes base64url encoded data from src into dest. +It will stop when it encountes any non-valid input characters. +It will then write the number of decoded bytes in dest into *destlen, +and return the number of bytes decoded from src. + +Many base64 variants demand padding in the last block. Some don't. This +implementation will consume padding if it is there, but will not +complain if it is not. + +dest can be NULL. destlen can be NULL. + +.SH "RETURN VALUE" +scan_base64url returns the number of bytes successfully scanned and +processed from src. +.SH EXAMPLES +scan_base64url("Zm5vcmQ",buf,&i) -> return 7, i=5, buf="fnord" + +.SH "SEE ALSO" +scan_base64(3), scan_xlong(3), scan_8long(3), fmt_ulong(3) diff --git a/textcode/scan_base64url.c b/textcode/scan_base64url.c index f1c1c5f..93a1ea7 100644 --- a/textcode/scan_base64url.c +++ b/textcode/scan_base64url.c @@ -15,17 +15,36 @@ static inline int dec(unsigned char x) { size_t scan_base64url(const char *src,char *dest,size_t *destlen) { unsigned short tmp=0,bits=0; register const unsigned char* s=(const unsigned char*) src; - const char* orig=dest; - for (;;) { + size_t i,j=0; + for (i=0;;) { int a=dec(*s); - if (a<0) break; + if (a<0) break; /* base64url does not have padding */ tmp=(tmp<<6)|a; bits+=6; ++s; if (bits>=8) { - *dest=(tmp>>(bits-=8)); - ++dest; + bits-=8; + if (dest) dest[i]=(tmp>>bits); + ++i; } } - *destlen=dest-orig; + if (destlen) *destlen=i; return (const char*)s-src; } + +#ifdef UNITTEST +#include +#include +#include + +int main() { + char buf[100]; + size_t i,l; + /* check that we don't consume padding */ + memset(buf,0,10); assert(scan_base64url("Zm5vcmQ=",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6)); + /* check that we don't insist on the padding */ + memset(buf,0,10); assert(scan_base64url("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6)); + /* check the special non-isalnum chars :) */ + memset(buf,0,10); assert(scan_base64url("_-8=",buf,&l)==3 && l==2 && !memcmp(buf,"\xff\xef",3)); + return 0; +} +#endif diff --git a/textcode/scan_html.c b/textcode/scan_html.c index 5ddb50a..f3fdad0 100644 --- a/textcode/scan_html.c +++ b/textcode/scan_html.c @@ -26,8 +26,8 @@ static const char* lookup(size_t ofs,const char* t) { } enum htmlmode { /* libowfat<home */ - OUTSIDE, /* ^^^^^^^^^^^^^^^^ -> `libowfat `http://example.com/"foo´ */ + OUTSIDE, /* ^^^^^^^^^^^^^^^^ -> libowfat http://example.com/"foo */ }; static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum htmlmode mode) { @@ -42,49 +42,41 @@ static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum ht size_t j; if ((s[i+2]&~32)=='X') { j=scan_xlong(src+i+3,&l); - if (!j) j+=3; + if (j) j+=3; } else { j=scan_ulong(src+i+2,&l); - if (!j) j+=3; + if (j) j+=2; } if (s[i+j]==';') { i+=j; - written+=fmt_utf8(dest+written,l); + written+=fmt_utf8(dest?dest+written:0,l); } else { - dest[written++]='&'; + if (dest) dest[written]='&'; + ++written; } continue; } utf8=lookup(1,src+i+1); if (utf8) { size_t l=strlen(utf8); - memcpy(dest+written,utf8,l); + if (dest) memcpy(dest+written,utf8,l); written+=l; i+=2+str_chr(src+i+2,';'); continue; } else - dest[written]='&'; + if (dest) dest[written]='&'; } else if (s[i]=='<') { - if (mode == OUTSIDE) break; - if (case_starts((const char*)s+i+1,"br>")) { - dest[written]='\n'; - i+=3; - } else if (case_starts((const char*)s+i+1,"p>")) { - dest[written]='\n'; ++written; - dest[written]='\n'; - i+=3; - } else - dest[written]=s[i]; + break; } else if (s[i]=='"' && mode==TAGARG) { if (i==0) { dq=1; continue; } break; } else if (mode==TAGARG && !dq && (s[i]==' ' || s[i]=='\t' || s[i]=='\n')) break; else - dest[written]=s[i]; + if (dest) dest[written]=s[i]; ++written; } - *destlen=written; + if (destlen) *destlen=written; return i; } @@ -98,13 +90,40 @@ size_t scan_html(const char *src,char *dest,size_t *destlen) { #ifdef UNITTEST #include +#undef UNITTEST +#include +#include +#include +#include +#include +#include +#include +#include int main() { char* html="libowfat<home"; char buf[100]; size_t destlen; + /* check that we stop at < */ assert(scan_html(html,buf,&destlen)==0 && destlen==0); - assert(scan_html(strchr(html,'>')+1,buf,&destlen)==16 && destlen==13 && !memcmp(buf,"libowfat')+1,buf,&destlen)==16 && destlen==13 && !memcmp(buf,"libowfat