Skip to content

Commit

Permalink
[core] add option to toggle ampersand entities on/off when serialize …
Browse files Browse the repository at this point in the history
…xml string
  • Loading branch information
seven1240 authored and andywolk committed Dec 30, 2019
1 parent 917d850 commit 7907994
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 29 deletions.
20 changes: 15 additions & 5 deletions src/include/switch_xml.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
#define FREESWITCH_XML_H
#include <switch.h>

/* Use UTF-8 as the general encoding */
#define USE_UTF_8_ENCODING SWITCH_TRUE

struct switch_xml_binding;

Expand Down Expand Up @@ -213,20 +215,28 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_get(_In_ switch_xml_t xml,...);
///\ must be freed.
///\param xml the xml node
///\param prn_header add <?xml version..> header too
///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars
///\return the ampersanded html text string to display xml
SWITCH_DECLARE(char *) switch_xml_toxml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header);
SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, _In_ switch_bool_t prn_header);
SWITCH_DECLARE(char *) switch_xml_tohtml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header);
#define switch_xml_toxml(xml, prn_header) switch_xml_toxml_ex(xml, prn_header, USE_UTF_8_ENCODING)
#define switch_xml_toxml_nolock(xml, prn_header) switch_xml_toxml_nolock_ex(xml, prn_header, USE_UTF_8_ENCODING)
#define switch_xml_tohtml(xml, prn_header) switch_xml_tohtml_ex(xml, prn_header, USE_UTF_8_ENCODING)

SWITCH_DECLARE(char *) switch_xml_toxml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
SWITCH_DECLARE(char *) switch_xml_tohtml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);

///\brief Converts an switch_xml structure back to xml using the buffer passed in the parameters.
///\param xml the xml node
///\param buf buffer to use
///\param buflen size of buffer
///\param offset offset to start at
///\param prn_header add <?xml version..> header too
///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars
///\return the xml text string
SWITCH_DECLARE(char *) switch_xml_toxml_buf(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset,
_In_ switch_bool_t prn_header);
#define switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header) switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header, USE_UTF_8_ENCODING);
SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset,
_In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);


///\brief returns a NULL terminated array of processing instructions for the given
///\ target
Expand Down
44 changes: 20 additions & 24 deletions src/switch_xml.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ void globfree(glob_t *);
#define SWITCH_XML_WS "\t\r\n " /* whitespace */
#define SWITCH_XML_ERRL 128 /* maximum error string length */

/* Use UTF-8 as the general encoding */
static switch_bool_t USE_UTF_8_ENCODING = SWITCH_TRUE;

static void preprocess_exec_set(char *keyval)
{
char *key = keyval;
Expand Down Expand Up @@ -2478,7 +2475,7 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_open_cfg(const char *file_path, switch_x

/* Encodes ampersand sequences appending the results to *dst, reallocating *dst
if length exceeds max. a is non-zero for attribute encoding. Returns *dst */
static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a)
static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a, switch_bool_t use_utf8_encoding)
{
const char *e = NULL;
int immune = 0;
Expand Down Expand Up @@ -2533,7 +2530,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
*dlen += sprintf(*dst + *dlen, "&#xD;");
break;
default:
if (USE_UTF_8_ENCODING && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
if (use_utf8_encoding && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
int num = 1;
for (;num<4;num++) {
if (! ((*s >> (7-num)) & 0x01)) {
Expand All @@ -2557,7 +2554,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
}
expecting_x_utf_8_char = num - 1;

} else if (USE_UTF_8_ENCODING && expecting_x_utf_8_char > 0) {
} else if (use_utf8_encoding && expecting_x_utf_8_char > 0) {
if (((*s >> 6) & 0x03) == 0x2) {

unicode_char = unicode_char << 6;
Expand All @@ -2584,7 +2581,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
/* Recursively converts each tag to xml appending it to *s. Reallocates *s if
its length exceeds max. start is the location of the previous tag in the
parent tag's character content. Returns *s. */
static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot)
static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot, switch_bool_t use_utf8_encoding)
{
int i, j;
char *txt;
Expand All @@ -2606,7 +2603,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
}

/* parent character content up to this tag */
*s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0);
*s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0, use_utf8_encoding);

while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) + 1 > *max) { /* reallocate s */
*s = (char *) switch_must_realloc(*s, *max += SWITCH_XML_BUFSIZE);
Expand All @@ -2628,7 +2625,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
}

*len += sprintf(*s + *len, " %s=\"", xml->attr[i]);
switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1);
switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1, use_utf8_encoding);
*len += sprintf(*s + *len, "\"");
}

Expand All @@ -2641,18 +2638,18 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
}

*len += sprintf(*s + *len, " %s=\"", attr[i][j]);
switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1);
switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1, use_utf8_encoding);
*len += sprintf(*s + *len, "\"");
}

*len += sprintf(*s + *len, (xml->child || xml->txt) ? ">" : "/>\n");

if (xml->child) {
(*count)++;
*s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0);
*s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0, use_utf8_encoding);

} else {
*s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0); /* data */
*s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0, use_utf8_encoding); /* data */
}

while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) > *max) { /* reallocate s */
Expand All @@ -2676,35 +2673,34 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
start = off;
goto tailrecurse;
/*
return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count);
return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count, use_utf8_encoding);
*/
} else {
if (*count > 0)
(*count)--;
return switch_xml_ampencode(txt + off, 0, s, len, max, 0);
return switch_xml_ampencode(txt + off, 0, s, len, max, 0, use_utf8_encoding);
}
}

SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
char *s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);

return switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header);
return switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
}


SWITCH_DECLARE(char *) switch_xml_toxml(switch_xml_t xml, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_toxml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
char *r, *s;

s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);

r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header);
r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);

return r;
}

SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_tohtml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
char *r, *s, *h;
switch_size_t rlen = 0;
Expand All @@ -2713,15 +2709,15 @@ SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_hea
s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
h = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);

r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header);
h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1);
r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1, use_utf8_encoding);
switch_safe_free(r);
return h;
}

/* converts a switch_xml structure back to xml, returning a string of xml data that
must be freed */
SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
switch_xml_t p = (xml) ? xml->parent : NULL;
switch_xml_root_t root = (switch_xml_root_t) xml;
Expand Down Expand Up @@ -2759,7 +2755,7 @@ SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_
}
}

s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1);
s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1, use_utf8_encoding);

for (i = 0; !p && root->pi[i]; i++) { /* post-root processing instructions */
for (k = 2; root->pi[i][k - 1]; k++);
Expand Down
50 changes: 50 additions & 0 deletions tests/unit/switch_xml.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,56 @@ FST_MINCORE_BEGIN()
switch_xml_free(xml);
}
FST_TEST_END()

FST_TEST_BEGIN(test_utf_8)
{
const char *text = "<xml>Voulez-Vous Parler Français</xml>";
switch_xml_t xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
char *xml_string = NULL;

fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>Voulez-Vous Parler Fran&#xE7;ais</xml>\n");
free(xml_string);

xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>Voulez-Vous Parler Français</xml>\n");
switch_xml_free(xml);
free(xml_string);

text = "<xml>你好,中文</xml>";
xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);

fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>&#x4F60;&#x597D;&#xFF0C;&#x4E2D;&#x6587;</xml>\n");
free(xml_string);

xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>你好,中文</xml>\n");
switch_xml_free(xml);
free(xml_string);

text = "<xml><tag><![CDATA[Voulez-Vous Parler Français]]></tag></xml>";

xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>\n <tag>Voulez-Vous Parler Fran&#xE7;ais</tag>\n</xml>\n");
switch_xml_free(xml);
free(xml_string);

xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>\n <tag>Voulez-Vous Parler Français</tag>\n</xml>\n");
switch_xml_free(xml);
}
FST_TEST_END()
}
FST_SUITE_END()
}
Expand Down

0 comments on commit 7907994

Please sign in to comment.