[macruby-changes] [5081] MacRuby/trunk/string.c

source_changes at macosforge.org source_changes at macosforge.org
Sat Dec 25 17:53:10 PST 2010


Revision: 5081
          http://trac.macosforge.org/projects/ruby/changeset/5081
Author:   vincent.isambart at gmail.com
Date:     2010-12-25 17:53:07 -0800 (Sat, 25 Dec 2010)
Log Message:
-----------
the speed of String#gsub should be a bit better

Modified Paths:
--------------
    MacRuby/trunk/string.c

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2010-12-24 14:56:10 UTC (rev 5080)
+++ MacRuby/trunk/string.c	2010-12-26 01:53:07 UTC (rev 5081)
@@ -847,11 +847,44 @@
     }
 
     rb_encoding_t *enc = str_must_have_compatible_encoding(self, str);
+    str_reset_flags(self);
     self->encoding = enc;
-    str_reset_flags(self);
     str_concat_bytes(self, str->bytes, str->length_in_bytes);
 }
 
+static void
+str_concat_string_part(rb_str_t *self, rb_str_t *str, long start, long len)
+{
+    assert(len >= 0 && start >= 0);
+    if (len == 0) {
+	return;
+    }
+
+    rb_encoding_t *enc = str_must_have_compatible_encoding(self, str);
+    str_reset_flags(self);
+    self->encoding = enc;
+
+    character_boundaries_t first_boundaries =
+	str_get_character_boundaries(self, start);
+    character_boundaries_t last_boundaries;
+    if (len == 1) {
+	last_boundaries = first_boundaries;
+    }
+    else {
+	last_boundaries = str_get_character_boundaries(self, start+len-1);
+    }
+
+    if ((first_boundaries.start_offset_in_bytes == -1) ||
+	    (last_boundaries.end_offset_in_bytes == -1)) {
+	// you cannot cut a surrogate in an encoding that is not UTF-16
+	str_cannot_cut_surrogate();
+    }
+
+    str_concat_bytes(self, &str->bytes[first_boundaries.start_offset_in_bytes],
+	    last_boundaries.end_offset_in_bytes -
+		first_boundaries.start_offset_in_bytes);
+}
+
 static int
 str_compare(rb_str_t *self, rb_str_t *str)
 {
@@ -1159,11 +1192,17 @@
     bool need_free = false;
 
     if (IS_RSTR(str)) {
-	chars_len = str_length(RSTR(str));
-	if (chars_len > 0) {
-	    chars = (UChar *)malloc(sizeof(UChar) * chars_len);
-	    str_extract_uchars_range(RSTR(str), 0, chars_len, chars);
-	    need_free = true;
+	rb_str_t *rstr = RSTR(str);
+	if (rstr->length_in_bytes > 0) {
+	    chars_len = str_length(RSTR(str));
+	    if (IS_NATIVE_UTF16_ENC(rstr->encoding)) {
+		chars = (UChar *)rstr->bytes;
+	    }
+	    else {
+		chars = (UChar *)malloc(sizeof(UChar) * chars_len);
+		str_extract_uchars_range(RSTR(str), 0, chars_len, chars);
+		need_free = true;
+	    }
 	}
     }
     else {
@@ -3501,14 +3540,8 @@
     rb_str_get_uchars(str, &str_chars, &str_chars_len,
 	    &str_chars_need_free);
 
-    UChar *src_chars = NULL;
-    long src_chars_len = 0;
-    bool src_chars_need_free = false;
-
-    rb_str_get_uchars(src, &src_chars, &src_chars_len,
-	    &src_chars_need_free);
-
     long pos = 0;
+    long src_chars_len = -1;
 
     for (long i = 0; i < str_chars_len; i++) {
 	UChar c = str_chars[i];
@@ -3542,12 +3575,16 @@
 		break;
 
 	    case '`':
-		str_concat_uchars(RSTR(val), src_chars, results[0].beg);
+		str_concat_string_part(RSTR(val), RSTR(src),
+			0, results[0].beg);
 		break;
 
 	    case '\'':
-		str_concat_uchars(RSTR(val), &src_chars[results[0].end],
-			src_chars_len - results[0].end);
+		if (src_chars_len == -1) {
+		    src_chars_len = str_length(RSTR(src));
+		}
+		str_concat_string_part(RSTR(val), RSTR(src),
+			results[0].end, src_chars_len - results[0].end);
 		break;
 
 	    case '+':
@@ -3575,8 +3612,8 @@
 	    if (results[no].beg == -1) {
 		continue;
 	    }
-	    str_concat_uchars(RSTR(val), &src_chars[results[no].beg],
-		    results[no].end - results[no].beg);
+	    str_concat_string_part(RSTR(val), RSTR(src),
+		    results[no].beg, results[no].end - results[no].beg);
 	}
     }
 
@@ -3587,9 +3624,6 @@
     if (str_chars_need_free) {
 	free(str_chars);
     }
-    if (src_chars_need_free) {
-	free(src_chars);
-    }
 
     if (val == 0) {
 	return str;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20101225/581402fc/attachment.html>


More information about the macruby-changes mailing list