[macruby-changes] [4055] MacRuby/trunk

source_changes at macosforge.org source_changes at macosforge.org
Sun May 9 00:50:48 PDT 2010


Revision: 4055
          http://trac.macosforge.org/projects/ruby/changeset/4055
Author:   vincent.isambart at gmail.com
Date:     2010-05-09 00:50:41 -0700 (Sun, 09 May 2010)
Log Message:
-----------
finished handling properly XML escaping in String#encode

Modified Paths:
--------------
    MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt
    MacRuby/trunk/string.c

Modified: MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt
===================================================================
--- MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt	2010-05-09 06:59:13 UTC (rev 4054)
+++ MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt	2010-05-09 07:50:41 UTC (rev 4055)
@@ -1,6 +1,2 @@
 fails:String#encode! raises Encoding::ConverterNotFoundError for invalid target encodings
 fails:String#encode raises Encoding::ConverterNotFoundError for invalid target encodings
-fails:String#encode! replaces xml characters
-fails:String#encode! replaces xml characters and quotes the result
-fails:String#encode replaces xml characters
-fails:String#encode replaces xml characters and quotes the result

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2010-05-09 06:59:13 UTC (rev 4054)
+++ MacRuby/trunk/string.c	2010-05-09 07:50:41 UTC (rev 4055)
@@ -1259,6 +1259,21 @@
     return str_transcode(self, self->encoding, dst_encoding,
 	    TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
 }
+
+static void inline
+str_concat_ascii_cstr(rb_str_t *self, char *cstr)
+{
+    long len = strlen(cstr);
+    if (self->encoding->ascii_compatible) {
+	str_concat_bytes(self, cstr, len);
+    }
+    else {
+	rb_str_t *str = RSTR(rb_enc_str_new(cstr, len, rb_encodings[ENCODING_ASCII]));
+	str = str_simple_transcode(str, self->encoding);
+	str_concat_bytes(self, str->data.bytes, str->length_in_bytes);
+    }
+}
+
 static rb_str_t *
 str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
 	int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str)
@@ -1273,7 +1288,8 @@
     rb_str_t *dst_str = str_alloc(rb_cRubyString);
     dst_str->encoding = dst_encoding;
 
-    if (self->length_in_bytes == 0) {
+    if ((self->length_in_bytes == 0) &&
+	    (behavior_for_undefined != TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR)) {
 	return dst_str;
     }
 
@@ -1302,6 +1318,10 @@
 	src_encoding_used = src_encoding;
     }
 
+    if (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR) {
+	str_concat_ascii_cstr(dst_str, "\"");
+    }
+
     long pos_in_src = 0;
     for (;;) {
 	UChar *utf16;
@@ -1319,6 +1339,76 @@
 	}
 
 	if (utf16_length > 0) {
+	    if ((behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT)
+		   || (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR)) {
+		long new_utf16_length = 0;
+		for (long i = 0; i < utf16_length; ++i) {
+		    switch (utf16[i]) {
+			case '&':
+			    new_utf16_length += 5;
+			    break;
+			case '<':
+			case '>':
+			    new_utf16_length += 4;
+			    break;
+			case '"':
+			    if (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR) {
+				new_utf16_length += 6;
+			    }
+			    else {
+				++new_utf16_length;
+			    }
+			    break;
+			default:
+			    ++new_utf16_length;
+		    }
+		}
+		if (new_utf16_length != utf16_length) {
+		    UChar *new_utf16 = xmalloc(UCHARS_TO_BYTES(new_utf16_length));
+		    long new_utf16_pos = 0;
+		    for (long i = 0; i < utf16_length; ++i) {
+			switch (utf16[i]) {
+			    case '&':
+				new_utf16[new_utf16_pos++] = '&';
+				new_utf16[new_utf16_pos++] = 'a';
+				new_utf16[new_utf16_pos++] = 'm';
+				new_utf16[new_utf16_pos++] = 'p';
+				new_utf16[new_utf16_pos++] = ';';
+				break;
+			    case '<':
+				new_utf16[new_utf16_pos++] = '&';
+				new_utf16[new_utf16_pos++] = 'l';
+				new_utf16[new_utf16_pos++] = 't';
+				new_utf16[new_utf16_pos++] = ';';
+				break;
+			    case '>':
+				new_utf16[new_utf16_pos++] = '&';
+				new_utf16[new_utf16_pos++] = 'g';
+				new_utf16[new_utf16_pos++] = 't';
+				new_utf16[new_utf16_pos++] = ';';
+				break;
+			    case '"':
+				if (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR) {
+				    new_utf16[new_utf16_pos++] = '&';
+				    new_utf16[new_utf16_pos++] = 'q';
+				    new_utf16[new_utf16_pos++] = 'u';
+				    new_utf16[new_utf16_pos++] = 'o';
+				    new_utf16[new_utf16_pos++] = 't';
+				    new_utf16[new_utf16_pos++] = ';';
+				}
+				else {
+				    new_utf16[new_utf16_pos++] = utf16[i];
+				}
+				break;
+			    default:
+				new_utf16[new_utf16_pos++] = utf16[i];
+			}
+		    }
+		    utf16_length = new_utf16_length;
+		    utf16 = new_utf16;
+		}
+	    }
+
 	    long utf16_pos = 0;
 	    for (;;) {
 		long bytes_length;
@@ -1346,14 +1436,7 @@
 			    {
 				char xml[10];
 				snprintf(xml, 10, "&#x%X;", c);
-				if (dst_encoding->ascii_compatible) {
-				    str_concat_bytes(dst_str, xml, strlen(xml));
-				}
-				else {
-				    rb_str_t *xml_str = RSTR(rb_str_new2(xml));
-				    xml_str = str_simple_transcode(xml_str, dst_encoding);
-				    str_concat_bytes(dst_str, xml_str->data.bytes, xml_str->length_in_bytes);
-				}
+				str_concat_ascii_cstr(dst_str, xml);
 			    }
 			    break;
 			default:
@@ -1400,6 +1483,11 @@
 	}
     }
 
+    if (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR) {
+	str_concat_ascii_cstr(dst_str, "\"");
+    }
+
+
     return dst_str;
 }
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100509/a45807ee/attachment-0001.html>


More information about the macruby-changes mailing list