libowfat/textcode/scan_cescape.c

102 lines
2.7 KiB
C

#include "fmt.h"
#include "textcode.h"
#include "scan.h"
size_t scan_cescape(const char *src,char *dest,size_t *destlen) {
register const unsigned char* s=(const unsigned char*) src;
size_t written=0,i;
char c;
for (i=0; s[i]; ++i) {
if ((c=s[i])=='\\') {
switch (s[i+1]) {
case 'a': c='\a'; break;
case 'b': c='\b'; break;
case 'e': c=0x1b; break;
case 'f': c='\f'; break;
case 'n': c='\n'; break;
case 'r': c='\r'; break;
case 't': c='\t'; break;
case 'v': c='\v';
case '\\': break;
case 'x': // hex escape; \x0a -> 10
{
unsigned char a,b;
a=scan_fromhex(s[i+2]);
b=scan_fromhex(s[i+3]);
if (a<16) {
if (b<16) {
c=(a<<4)+b;
i+=2;
} else {
c=a;
i+=1;
}
}
}
break;
case 'u': // C99 unicode escape: \u000a -> 10
case 'U': // C99 unicode escape: \U0000000a -> 10
{
unsigned int j,k=0,l=(s[i+1]=='U'?10:6);
for (j=2; j<l; ++j) {
unsigned char c=scan_fromhex(s[i+j]);
if (c>=16) // error
goto error; // don't allow short sequences
k=k*16+c;
}
written+=fmt_utf8(dest?dest+written:0,k);
i+=j-1;
continue;
}
default:
if (s[i+1]>='0' && s[i+1]<='7') { // octal escape; \012 -> 10
unsigned int j,k;
for (k=0,j=1; j<4; ++j) {
unsigned int l=s[i+j]-'0';
if (l<8)
k=k*8+l;
else
break;
}
if (dest) dest[written++]=k;
i+=j-1;
continue;
}
--i;
}
++i;
} else if (c=='"')
break;
if (dest) dest[written]=c;
++written;
}
error:
if (destlen) *destlen=written;
return i;
}
#ifdef UNITTEST
#include <assert.h>
#undef UNITTEST
#include <scan/scan_fromhex.c>
#include <fmt/fmt_utf8.c>
int main() {
size_t dl;
char buf[100];
assert(scan_cescape("test\\n\");",buf,&dl)==6 && dl==5 && !memcmp(buf,"test\n",5));
/* check hex and octal escaping */
assert(scan_cescape("test\\x0a\");",buf,&dl)==8 && dl==5 && !memcmp(buf,"test\n",5));
assert(scan_cescape("test\\012\");",buf,&dl)==8 && dl==5 && !memcmp(buf,"test\n",5));
/* check short escape sequences */
assert(scan_cescape("test\\xa\");",buf,&dl)==7 && dl==5 && !memcmp(buf,"test\n",5));
assert(scan_cescape("test\\12\");",buf,&dl)==7 && dl==5 && !memcmp(buf,"test\n",5));
assert(scan_cescape("test\\1\");",buf,&dl)==6 && dl==5 && !memcmp(buf,"test\1",5));
/* check unicode */
assert(scan_cescape("test\\u000a\");",buf,&dl)==10 && dl==5 && !memcmp(buf,"test\n",5));
assert(scan_cescape("test\\U0000000a\");",buf,&dl)==14 && dl==5 && !memcmp(buf,"test\n",5));
/* check that short sequences are rejected */
assert(scan_cescape("test\\Ua\");",buf,&dl)==4 && dl==4 && !memcmp(buf,"test",4));
}
#endif