[macruby-changes] [2340] MacRuby/trunk

source_changes at macosforge.org source_changes at macosforge.org
Sun Aug 16 21:13:28 PDT 2009


Revision: 2340
          http://trac.macosforge.org/projects/ruby/changeset/2340
Author:   lsansonetti at apple.com
Date:     2009-08-16 21:13:27 -0700 (Sun, 16 Aug 2009)
Log Message:
-----------
a few strings/re optimizations + fixed a GC crasher

Modified Paths:
--------------
    MacRuby/trunk/include/ruby/re.h
    MacRuby/trunk/re.c
    MacRuby/trunk/string.c

Modified: MacRuby/trunk/include/ruby/re.h
===================================================================
--- MacRuby/trunk/include/ruby/re.h	2009-08-17 04:12:19 UTC (rev 2339)
+++ MacRuby/trunk/include/ruby/re.h	2009-08-17 04:13:27 UTC (rev 2340)
@@ -51,6 +51,7 @@
 
 VALUE rb_reg_regcomp(VALUE);
 int rb_reg_search(VALUE, VALUE, int, int);
+int rb_reg_search2(VALUE, VALUE, int, int, bool);
 VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE);
 int rb_reg_adjust_startpos(VALUE, VALUE, int, int);
 void rb_match_busy(VALUE);

Modified: MacRuby/trunk/re.c
===================================================================
--- MacRuby/trunk/re.c	2009-08-17 04:12:19 UTC (rev 2339)
+++ MacRuby/trunk/re.c	2009-08-17 04:13:27 UTC (rev 2340)
@@ -1451,9 +1451,9 @@
 }
 
 int
-rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
+rb_reg_search2(VALUE re, VALUE str, int pos, int reverse, bool need_match_str)
 {
-    regex_t *reg0 = RREGEXP(re)->ptr, *reg;
+    regex_t *reg0 = RREGEXP(re)->ptr;
     int busy = FL_TEST(re, REG_BUSY);
 
     static struct re_registers *regs = NULL;
@@ -1472,7 +1472,7 @@
     char *cstr = NULL;
     size_t charsize = 0;
     bool should_free = false;
-    reg = rb_reg_prepare_re(re, str, &cstr, &charsize, &should_free);
+    regex_t *reg = rb_reg_prepare_re(re, str, &cstr, &charsize, &should_free);
 
     char *range = cstr;
     FL_SET(re, REG_BUSY);
@@ -1536,6 +1536,7 @@
     VALUE match = rb_backref_get();
     if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
 	match = match_alloc(rb_cMatch, 0);
+	rb_backref_set(match);
     }
     else {
 	if (rb_safe_level() >= 3) {
@@ -1548,10 +1549,19 @@
 
     onig_region_copy(RMATCH_REGS(match), pregs);
     onig_region_free(pregs, 0);
-    GC_WB(&RMATCH(match)->str, rb_str_new4(str)); // OPTIMIZE
-    GC_WB(&RMATCH(match)->regexp, re);
+    if (need_match_str) {
+	if (RMATCH(match)->str == 0
+		|| !CFEqual((CFTypeRef)RMATCH(match)->str, (CFTypeRef)str)) {
+	    GC_WB(&RMATCH(match)->str, rb_str_new4(str));
+	}
+    }
+    else {
+	RMATCH(match)->str = 0;
+    }
+    if (RMATCH(match)->regexp != re) {
+	GC_WB(&RMATCH(match)->regexp, re);
+    }
     RMATCH(match)->rmatch->char_offset_updated = 0;
-    rb_backref_set(match);
 
     OBJ_INFECT(match, re);
     OBJ_INFECT(match, str);
@@ -1559,6 +1569,12 @@
     return result;
 }
 
+int
+rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
+{
+    return rb_reg_search2(re, str, pos, reverse, true);
+}
+
 VALUE
 rb_reg_nth_defined(int nth, VALUE match)
 {
@@ -2686,7 +2702,7 @@
     return re;
 }
 
-static VALUE reg_cache;
+static VALUE reg_cache = 0;
 
 VALUE
 rb_reg_regcomp(VALUE str)
@@ -2698,10 +2714,16 @@
 #else
 	&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
 #endif
-        && memcmp(RREGEXP(reg_cache)->str, RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
+	&& memcmp(RREGEXP(reg_cache)->str, RSTRING_PTR(str), RSTRING_LEN(str)) == 0) {
 	return reg_cache;
+    }
 
-    return reg_cache = rb_reg_new_str(save_str, 0);
+    if (reg_cache != 0) {
+	GC_RELEASE(reg_cache);
+    }
+    reg_cache = rb_reg_new_str(save_str, 0);
+    GC_RETAIN(reg_cache);
+    return reg_cache;
 }
 
 /*

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2009-08-17 04:12:19 UTC (rev 2339)
+++ MacRuby/trunk/string.c	2009-08-17 04:13:27 UTC (rev 2340)
@@ -673,6 +673,11 @@
 	len = n - beg;
     }
 
+    if (*(VALUE *)str == rb_cByteString) {
+	UInt8 *str_data = rb_bytestring_byte_pointer(str);
+	return rb_bytestring_new_with_data(str_data + beg, len);
+    }
+
     substr = CFStringCreateMutable(NULL, 0);
 
     if (len == 1) {
@@ -2002,7 +2007,7 @@
 }
 
 static VALUE
-str_gsub(SEL sel, int argc, VALUE *argv, VALUE str, int bang)
+str_gsub(SEL sel, int argc, VALUE *argv, VALUE str, bool bang)
 {
     bool iter = false;
     bool tainted = false;
@@ -2144,7 +2149,7 @@
 static VALUE
 rb_str_gsub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    return str_gsub(sel, argc, argv, str, 1);
+    return str_gsub(sel, argc, argv, str, true);
 }
 
 
@@ -2184,7 +2189,7 @@
 static VALUE
 rb_str_gsub(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    return str_gsub(sel, argc, argv, str, 0);
+    return str_gsub(sel, argc, argv, str, false);
 }
 
 
@@ -3567,10 +3572,13 @@
 
     if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
 	lim = NUM2INT(limit);
-	if (lim <= 0) limit = Qnil;
+	if (lim <= 0) {
+	    limit = Qnil;
+	}
 	else if (lim == 1) {
-	    if (clen == 0)
+	    if (clen == 0) {
 		return rb_ary_new2(0);
+	    }
 	    return rb_ary_new3(1, str);
 	}
 	i = 1;
@@ -3603,28 +3611,31 @@
     if (awk_split || spat_string) {
 	CFRange search_range;
 	CFCharacterSetRef charset = NULL;
-	if (spat == Qnil)
+	if (spat == Qnil) {
 	    charset = CFCharacterSetGetPredefined(
-		kCFCharacterSetWhitespaceAndNewline);
+		    kCFCharacterSetWhitespaceAndNewline);
+	}
 	search_range = CFRangeMake(0, clen);
 	do {
 	    CFRange result_range;
 	    CFRange substr_range;
 	    if (spat != Qnil) {
 		if (!CFStringFindWithOptions((CFStringRef)str, 
-		    (CFStringRef)spat,
-		    search_range,
-		    0,
-		    &result_range))
+			    (CFStringRef)spat,
+			    search_range,
+			    0,
+			    &result_range)) {
 		    break;
+		}
 	    }
 	    else {
 		if (!CFStringFindCharacterFromSet((CFStringRef)str,
-		    charset, 
-		    search_range,
-		    0,
-		    &result_range))
+			    charset, 
+			    search_range,
+			    0,
+			    &result_range)) {
 		    break;
+		}
 	    }
 
 	    substr_range.location = search_range.location;
@@ -3632,23 +3643,21 @@
 		- search_range.location;
 
 	    if (awk_split == Qfalse || substr_range.length > 0) {
-		VALUE substr;
-	       
-		substr = rb_str_subseq(str, substr_range.location,
+		VALUE substr = rb_str_subseq(str, substr_range.location,
 		    substr_range.length);
 
 		if (awk_split == Qtrue) {
 		    CFStringTrimWhitespace((CFMutableStringRef)substr);
-		    if (CFStringGetLength((CFStringRef)substr) > 0)
+		    if (CFStringGetLength((CFStringRef)substr) > 0) {
 			rb_ary_push(result, substr);
+		    }
 		}
 		else {
 		    rb_ary_push(result, substr);
 		}
 	    }
 
-	    search_range.location = result_range.location 
-		+ result_range.length;
+	    search_range.location = result_range.location + result_range.length;
 	    search_range.length = clen - search_range.location;
 	}
 	while ((limit == Qnil || --lim > 1));
@@ -3656,26 +3665,22 @@
     }
     else {
 	long start = beg;
-	long idx;
-	int last_null = 0;
+	bool last_null = false;
 	struct re_registers *regs;
 
-	while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
+	while ((end = rb_reg_search2(spat, str, start, 0, false)) >= 0) {
 	    regs = RMATCH_REGS(rb_backref_get());
 	    if (start == end && BEG(0) == END(0)) {
 		if (0) {
 		    break;
 		}
-		else if (last_null == 1) {
+		else if (last_null) {
 		    rb_ary_push(result, rb_str_subseq(str, beg, 1));
 		    beg = start;
 		}
 		else {
-                    if (start == clen)
-                        start++;
-                    else
-			start += 1;
-		    last_null = 1;
+		    start++;
+		    last_null = true;
 		    continue;
 		}
 	    }
@@ -3683,17 +3688,23 @@
 		rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
 		beg = start = END(0);
 	    }
-	    last_null = 0;
+	    last_null = false;
 
-	    for (idx=1; idx < regs->num_regs; idx++) {
-		if (BEG(idx) == -1) continue;
-		if (BEG(idx) == END(idx))
+	    for (long idx = 1; idx < regs->num_regs; idx++) {
+		if (BEG(idx) == -1) {
+		    continue;
+		}
+		if (BEG(idx) == END(idx)) {
 		    tmp = rb_str_new5(str, 0, 0);
-		else
-		    tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
+		}
+		else {
+		    tmp = rb_str_subseq(str, BEG(idx), END(idx) - BEG(idx));
+		}
 		rb_ary_push(result, tmp);
 	    }
-	    if (!NIL_P(limit) && lim <= ++i) break;
+	    if (!NIL_P(limit) && lim <= ++i) {
+		break;
+	    }
 	}
     }
     if (clen > 0 && (!NIL_P(limit) || clen > beg || lim < 0)) {
@@ -4284,14 +4295,15 @@
 	return result;
     }
 
-    if (rb_reg_search(pat, str, *start, 0) >= 0) {
+    if (rb_reg_search2(pat, str, *start, 0, false) >= 0) {
 	match = rb_backref_get();
+	GC_WB(&RMATCH(match)->str, str);
 	regs = RMATCH_REGS(match);
 	if (BEG(0) == END(0)) {
 	    /*
 	     * Always consume at least one character of the input string
 	     */
-		*start = END(0)+1;
+	    *start = END(0)+1;
 	}
 	else {
 	    *start = END(0);
@@ -4358,10 +4370,8 @@
 
 	while (!NIL_P(result = scan_once(str, pat, &start, len, 
 					 pat_is_string))) {
-	    match = rb_backref_get();
 	    rb_ary_push(ary, result);
 	}
-	rb_backref_set(match);
 	return ary;
     }
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20090816/fed5c062/attachment-0001.html>


More information about the macruby-changes mailing list