From 38ea25fd9ffdb63bc5b8b08854a5048ca6deb2b6 Mon Sep 17 00:00:00 2001 From: leitner Date: Sat, 27 Dec 2014 13:39:20 +0000 Subject: [PATCH] more updates --- GNUmakefile | 2 +- fmt/fmt_xmlescape.c | 40 ++++++++++++++++++++++++++++++++++++++++ test/uudecode.c | 8 +++++--- 3 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 fmt/fmt_xmlescape.c diff --git a/GNUmakefile b/GNUmakefile index c910817..c60c185 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -31,7 +31,7 @@ CFLAGS += -D_REENTRANT $(WERROR) # startrip ifneq ($(DEBUG),) -CFLAGS=-pipe -Wall -g +CFLAGS=-pipe -Wall -g -Og endif path = $(subst :, ,$(PATH)) diet_path = $(foreach dir,$(path),$(wildcard $(dir)/diet)) diff --git a/fmt/fmt_xmlescape.c b/fmt/fmt_xmlescape.c new file mode 100644 index 0000000..b3733f8 --- /dev/null +++ b/fmt/fmt_xmlescape.c @@ -0,0 +1,40 @@ +#include "fmt.h" + +size_t fmt_xmlescape(char* dest,uint32_t ch) { + char* x; + size_t n; +/* +From http://en.wikipedia.org/wiki/XML#Valid_characters + +Unicode code points in the following ranges are valid in XML 1.0 documents: + U+0009, U+000A, U+000D: these are the only C0 controls accepted in XML 1.0; + U+0020–U+D7FF, U+E000–U+FFFD: this excludes some (not all) non-characters in the BMP (all surrogates, U+FFFE and U+FFFF are forbidden); + U+10000–U+10FFFF: this includes all code points in supplementary planes, including non-characters. +*/ + if (ch==0 || (ch>=0xd780 && ch<=0xdfff) || ch==0xfffe || ch==0xffff || ch>0x10ffff) return 0; + if ((ch&0x7f)<20 && ch!=9 && ch!=0xa && ch!=0xd && ch!=0x85) { + char buf[6]; + buf[0]='&'; + buf[1]='#'; + buf[2]='x'; + n=3+fmt_xlong(buf+3,ch); + buf[n++]=';'; + x=buf; + } else + switch (ch) { + case '&': + x="&"; n=5; + break; + case '<': + x="<"; n=4; + break; + default: + return fmt_utf8(dest,ch); + } + if (dest) { + size_t i; + for (i=0; i