[macruby-changes] [5136] MacRuby/trunk/string.c
source_changes at macosforge.org
source_changes at macosforge.org
Fri Jan 7 18:04:21 PST 2011
Revision: 5136
http://trac.macosforge.org/projects/ruby/changeset/5136
Author: vincent.isambart at gmail.com
Date: 2011-01-07 18:04:16 -0800 (Fri, 07 Jan 2011)
Log Message:
-----------
much faster String#gsub
Modified Paths:
--------------
MacRuby/trunk/string.c
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2011-01-08 01:29:20 UTC (rev 5135)
+++ MacRuby/trunk/string.c 2011-01-08 02:04:16 UTC (rev 5136)
@@ -920,7 +920,8 @@
}
static void
-str_concat_string_part(rb_str_t *self, rb_str_t *str, long start, long len)
+str_concat_string_part(rb_str_t *self, rb_str_t *str, long start, long len,
+ character_boundaries_cache_t *cache_for_str)
{
assert(len >= 0 && start >= 0);
if (len == 0) {
@@ -931,18 +932,15 @@
str_reset_flags(self);
self->encoding = enc;
- character_boundaries_cache_t local_cache;
- reset_character_boundaries_cache(&local_cache);
-
character_boundaries_t first_boundaries =
- str_get_character_boundaries(str, start, &local_cache);
+ str_get_character_boundaries(str, start, cache_for_str);
character_boundaries_t last_boundaries;
if (len == 1) {
last_boundaries = first_boundaries;
}
else {
last_boundaries = str_get_character_boundaries(str, start+len-1,
- &local_cache);
+ cache_for_str);
}
if ((first_boundaries.start_offset_in_bytes == -1) ||
@@ -3639,15 +3637,20 @@
static VALUE
rb_reg_regsub(VALUE str, VALUE src, VALUE regexp, rb_match_result_t *results,
- int results_count)
+ int results_count, character_boundaries_cache_t *cache_for_src)
{
VALUE val = 0;
RB_STR_GET_UCHARS(str, str_chars, str_chars_len);
long pos = 0;
- long src_chars_len = -1;
+ // if we already have a cache, we will make a local copy just before
+ // using it to be sure not to have to start from scratch later
+ // (as for instance with "\\2\\1" would make us do)
+ character_boundaries_cache_t local_cache_for_src;
+ reset_character_boundaries_cache(&local_cache_for_src);
+
for (long i = 0; i < str_chars_len; i++) {
UChar c = str_chars[i];
if (c != '\\') {
@@ -3680,16 +3683,29 @@
break;
case '`':
+ if (cache_for_src != NULL) {
+ local_cache_for_src = *cache_for_src;
+ }
str_concat_string_part(RSTR(val), RSTR(src),
- 0, results[0].beg);
+ 0, results[0].beg, cache_for_src);
break;
case '\'':
- if (src_chars_len == -1) {
- src_chars_len = str_length(RSTR(src));
+ {
+ long src_chars_len;
+ if (cache_for_src == NULL) {
+ src_chars_len = str_length_with_cache(RSTR(src),
+ &local_cache_for_src);
+ }
+ else {
+ src_chars_len = str_length_with_cache(RSTR(src),
+ cache_for_src);
+ local_cache_for_src = *cache_for_src;
+ }
+ str_concat_string_part(RSTR(val), RSTR(src),
+ results[0].end, src_chars_len - results[0].end,
+ cache_for_src);
}
- str_concat_string_part(RSTR(val), RSTR(src),
- results[0].end, src_chars_len - results[0].end);
break;
case '+':
@@ -3717,8 +3733,12 @@
if (results[no].beg == -1) {
continue;
}
+ if (cache_for_src != NULL) {
+ local_cache_for_src = *cache_for_src;
+ }
str_concat_string_part(RSTR(val), RSTR(src),
- results[no].beg, results[no].end - results[no].beg);
+ results[no].beg, results[no].end - results[no].beg,
+ cache_for_src);
}
}
@@ -3796,7 +3816,7 @@
}
}
else {
- repl = rb_reg_regsub(repl, str, pat, results, count);
+ repl = rb_reg_regsub(repl, str, pat, results, count, NULL);
}
rstr_modify(str);
@@ -3915,6 +3935,9 @@
VALUE matcher = rb_reg_matcher_new(pat, str);
+ character_boundaries_cache_t local_cache_for_str;
+ reset_character_boundaries_cache(&local_cache_for_str);
+
while (true) {
const long pos = rb_reg_matcher_search(pat, matcher, offset, false);
if (pos < 0) {
@@ -3923,7 +3946,8 @@
return bang ? Qnil : rstr_dup(str, 0);
}
if (last < len) {
- VALUE substr = rstr_substr(str, last, len - last);
+ VALUE substr = rstr_substr_with_cache(str, last, len - last,
+ &local_cache_for_str);
if (substr != Qnil) {
str_concat_string(RSTR(dest), RSTR(substr));
}
@@ -3931,6 +3955,13 @@
break;
}
+ if (pos - last > 0) {
+ // this concatenation must be done before calling the block
+ // or doing the replacement or else the cache can't be used
+ str_concat_string_part(RSTR(dest), RSTR(str), last, pos - last,
+ &local_cache_for_str);
+ }
+
match = rb_backref_get();
int count = 0;
rb_match_result_t *results = rb_reg_match_results(match, &count);
@@ -3940,11 +3971,15 @@
if (block_given || !NIL_P(hash)) {
if (block_given) {
rb_match_busy(match);
- val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
+ VALUE to_yield = rb_reg_nth_match_with_cache(0, match,
+ &local_cache_for_str);
+ val = rb_obj_as_string(rb_yield(to_yield));
}
else {
- val = rb_hash_aref(hash, rstr_substr(str, results[0].beg,
- results[0].end - results[0].beg));
+ val = rb_hash_aref(hash, rstr_substr_with_cache(str,
+ results[0].beg,
+ results[0].end - results[0].beg,
+ &local_cache_for_str));
val = rb_obj_as_string(val);
}
if (bang) {
@@ -3956,13 +3991,10 @@
}
}
else {
- val = rb_reg_regsub(repl, str, pat, results, count);
+ val = rb_reg_regsub(repl, str, pat, results, count,
+ &local_cache_for_str);
}
- if (pos - last > 0) {
- str_concat_string(RSTR(dest),
- RSTR(rstr_substr(str, last, pos - last)));
- }
str_concat_string(RSTR(dest), str_need_string(val));
if (OBJ_TAINTED(val)) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20110107/4591e62e/attachment.html>
More information about the macruby-changes
mailing list