[macruby-changes] [5059] MacRuby/trunk/string.c

source_changes at macosforge.org source_changes at macosforge.org
Mon Dec 20 14:30:16 PST 2010


Revision: 5059
          http://trac.macosforge.org/projects/ruby/changeset/5059
Author:   vincent.isambart at gmail.com
Date:     2010-12-20 14:30:11 -0800 (Mon, 20 Dec 2010)
Log Message:
-----------
added an implementation of str_each_uchar32 for UTF-8 (UTF-8 was using the generic UCnv one)

Modified Paths:
--------------
    MacRuby/trunk/string.c

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2010-12-20 15:13:57 UTC (rev 5058)
+++ MacRuby/trunk/string.c	2010-12-20 22:30:11 UTC (rev 5059)
@@ -387,7 +387,7 @@
     }
 }
 
-// Note that each_char iterates on unicode characters
+// Note that each_uchar32 iterates on Unicode characters
 // With a character not in the BMP the callback will only be called once!
 static void
 str_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback)
@@ -405,6 +405,29 @@
 	    }
 	}
     }
+    else if (IS_UTF8_ENC(self->encoding)) {
+	bool stop = false;
+	for (int i = 0; i < self->length_in_bytes; ) {
+	    UChar32 c;
+	    int old_i = i;
+	    U8_NEXT(self->bytes, i, self->length_in_bytes, c);
+	    int char_length = i - old_i;
+	    if (c == U_SENTINEL) {
+		for (long j = 0; j < char_length; ++j) {
+		    callback(c, old_i+j, 1, &stop);
+		    if (stop) {
+			return;
+		    }
+		}
+	    }
+	    else {
+		callback(c, old_i, char_length, &stop);
+		if (stop) {
+		    return;
+		}
+	    }
+	};
+    }
     else if (IS_NATIVE_UTF16_ENC(self->encoding)) {
 	bool stop = false;
 	long length = BYTES_TO_UCHARS(self->length_in_bytes);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20101220/a0efe423/attachment.html>


More information about the macruby-changes mailing list