[macruby-changes] [5199] MacRuby/trunk

source_changes at macosforge.org source_changes at macosforge.org
Thu Jan 27 20:48:32 PST 2011


Revision: 5199
          http://trac.macosforge.org/projects/ruby/changeset/5199
Author:   vincent.isambart at gmail.com
Date:     2011-01-27 20:48:31 -0800 (Thu, 27 Jan 2011)
Log Message:
-----------
String#index faster and with less bugs

Modified Paths:
--------------
    MacRuby/trunk/encoding_ucnv.h
    MacRuby/trunk/string.c
    MacRuby/trunk/ucnv.c

Modified: MacRuby/trunk/encoding_ucnv.h
===================================================================
--- MacRuby/trunk/encoding_ucnv.h	2011-01-28 04:39:20 UTC (rev 5198)
+++ MacRuby/trunk/encoding_ucnv.h	2011-01-28 04:48:31 UTC (rev 5199)
@@ -25,7 +25,9 @@
 character_boundaries_t str_ucnv_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode);
 void str_ucnv_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length);
 void str_ucnv_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *utf16_pos, char **bytes, long *bytes_length);
-void str_ucnv_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback);
+void str_ucnv_each_uchar32_starting_from(rb_str_t *self,
+	long start_offset_in_bytes,
+	each_uchar32_callback_t callback);
 
 #if defined(__cplusplus)
 } // extern "C"

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2011-01-28 04:39:20 UTC (rev 5198)
+++ MacRuby/trunk/string.c	2011-01-28 04:48:31 UTC (rev 5199)
@@ -442,12 +442,15 @@
 
 // Note that each_uchar32 iterates on Unicode characters
 // With a character not in the BMP the callback will only be called once!
+// start_offset_in_bytes MUST be at a character boundary
 static void
-str_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback)
+str_each_uchar32_starting_from(rb_str_t *self,
+	long start_offset_in_bytes,
+	each_uchar32_callback_t callback)
 {
     if (IS_BINARY_ENC(self->encoding) || IS_ASCII_ENC(self->encoding)) {
 	bool stop = false;
-	for (long i = 0; i < self->length_in_bytes; ++i) {
+	for (long i = start_offset_in_bytes; i < self->length_in_bytes; ++i) {
 	    UChar32 c = (uint8_t)self->bytes[i];
 	    if (!IS_BINARY_ENC(self->encoding) && c > 127) {
 		c = U_SENTINEL;
@@ -460,7 +463,7 @@
     }
     else if (IS_UTF8_ENC(self->encoding)) {
 	bool stop = false;
-	for (int i = 0; i < self->length_in_bytes; ) {
+	for (int i = start_offset_in_bytes; i < self->length_in_bytes; ) {
 	    UChar32 c;
 	    int old_i = i;
 	    U8_NEXT(self->bytes, i, self->length_in_bytes, c);
@@ -482,6 +485,7 @@
 	};
     }
     else if (IS_NATIVE_UTF16_ENC(self->encoding)) {
+	assert(!ODD_NUMBER(start_offset_in_bytes));
 	bool stop = false;
 	long length = BYTES_TO_UCHARS(self->length_in_bytes);
 	UChar *uchars = (UChar *)self->bytes;
@@ -500,10 +504,17 @@
 	};
     }
     else {
-	str_ucnv_each_uchar32(self, callback);
+	str_ucnv_each_uchar32_starting_from(self,
+		start_offset_in_bytes, callback);
     }
 }
 
+static void
+str_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback)
+{
+    str_each_uchar32_starting_from(self, 0, callback);
+}
+
 static UChar
 str_get_uchar(rb_str_t *self, long pos)
 {
@@ -1134,6 +1145,27 @@
 	end_offset_in_bytes = boundaries.end_offset_in_bytes;
     }
 
+    if (!backward_search) {
+	__block long returned_index = -1;
+	__block long current_index = start_index;
+	str_each_uchar32_starting_from(self, start_offset_in_bytes,
+		^(UChar32 c, long character_start_offset, long char_len, bool *stop) {
+	    if (end_offset_in_bytes - character_start_offset < searched->length_in_bytes) {
+		// not enough characters left: we could not find the string
+		*stop = true;
+		return;
+	    }
+	    if (memcmp(self->bytes + character_start_offset,
+		    searched->bytes, searched->length_in_bytes) == 0) {
+		returned_index = current_index;
+		*stop = true;
+		return;
+	    }
+	    ++current_index;
+	});
+	return returned_index;
+    }
+
     const long offset_in_bytes = str_offset_in_bytes_for_string(self,
 	    searched, start_offset_in_bytes, end_offset_in_bytes,
 	    backward_search);
@@ -1179,8 +1211,8 @@
 static bool
 str_include_string(rb_str_t *self, rb_str_t *searched)
 {
-    return str_offset_in_bytes_for_string(self, searched, 0,
-	    self->length_in_bytes, true) != -1;
+    return str_index_for_string_with_cache(self, searched,
+	0, -1, false, NULL) != -1;
 }
 
 rb_str_t *

Modified: MacRuby/trunk/ucnv.c
===================================================================
--- MacRuby/trunk/ucnv.c	2011-01-28 04:39:20 UTC (rev 5198)
+++ MacRuby/trunk/ucnv.c	2011-01-28 04:48:31 UTC (rev 5199)
@@ -138,12 +138,14 @@
 void rb_ensure_b(void (^b_block)(void), void (^e_block)(void));
 
 void
-str_ucnv_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback)
+str_ucnv_each_uchar32_starting_from(rb_str_t *self,
+	long start_offset_in_bytes,
+	each_uchar32_callback_t callback)
 {
     USE_CONVERTER(cnv, self->encoding);
 
     rb_ensure_b(^{
-	const char *pos = self->bytes;
+	const char *pos = self->bytes + start_offset_in_bytes;
 	const char *end = pos + self->length_in_bytes;
 	bool stop = false;
 	for (;;) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20110127/3d7fcba7/attachment.html>


More information about the macruby-changes mailing list