[macruby-changes] [5188] MacRuby/trunk/string.c

source_changes at macosforge.org source_changes at macosforge.org
Tue Jan 25 03:05:37 PST 2011


Revision: 5188
          http://trac.macosforge.org/projects/ruby/changeset/5188
Author:   vincent.isambart at gmail.com
Date:     2011-01-25 03:05:34 -0800 (Tue, 25 Jan 2011)
Log Message:
-----------
fixes for ticket #1124

String#each_char and String#ord now raise an exception if the String contain
invalid characters.
String#ord returns the byte value even if a it's not an ASCII character.

Modified Paths:
--------------
    MacRuby/trunk/string.c

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2011-01-21 22:55:29 UTC (rev 5187)
+++ MacRuby/trunk/string.c	2011-01-25 11:05:34 UTC (rev 5188)
@@ -434,6 +434,12 @@
     return str_length_with_cache(self, NULL);
 }
 
+NORETURN(static void
+str_invalid_byte_sequence(rb_str_t *str))
+{
+    rb_raise(rb_eArgError, "invalid byte sequence in %s", str->encoding->public_name);
+}
+
 // Note that each_uchar32 iterates on Unicode characters
 // With a character not in the BMP the callback will only be called once!
 static void
@@ -443,7 +449,7 @@
 	bool stop = false;
 	for (long i = 0; i < self->length_in_bytes; ++i) {
 	    UChar32 c = (uint8_t)self->bytes[i];
-	    if (c > 127) {
+	    if (!IS_BINARY_ENC(self->encoding) && c > 127) {
 		c = U_SENTINEL;
 	    }
 	    callback(c, i, 1, &stop);
@@ -3452,7 +3458,11 @@
     if (RSTR(str)->length_in_bytes == 0) {
 	rb_raise(rb_eArgError, "empty string");
     }
-    return INT2NUM(rb_str_get_uchar(str, 0));
+    UChar c = rb_str_get_uchar(str, 0);
+    if (c == (UChar)U_SENTINEL) {
+	str_invalid_byte_sequence(RSTR(str));
+    }
+    return INT2NUM(c);
 }
 
 /*
@@ -4767,6 +4777,9 @@
     __block VALUE return_value = str;
 
     str_each_uchar32(RSTR(str), ^(UChar32 c, long start_index, long char_len, bool *stop) {
+	if (c == U_SENTINEL) {
+	    str_invalid_byte_sequence(RSTR(str));
+	}
 	VALUE charstr = (VALUE)str_new_copy_of_part(RSTR(str),
 	    start_index, char_len);
 	rb_yield(charstr);
@@ -4835,8 +4848,7 @@
     __block VALUE return_value = str;
     str_each_uchar32(RSTR(str), ^(UChar32 c, long start_index, long char_len, bool *stop) {
 	if (c == U_SENTINEL) {
-	    rb_raise(rb_eArgError, "invalid byte sequence in %s",
-		RSTR(str)->encoding->public_name);
+	    str_invalid_byte_sequence(RSTR(str));
 	}
 	rb_yield(INT2NUM(c));
 	VALUE v = rb_vm_pop_broken_value();
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20110125/f7e3f65c/attachment.html>


More information about the macruby-changes mailing list