[macruby-changes] [3862] MacRuby/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Wed Mar 24 23:12:32 PDT 2010
Revision: 3862
http://trac.macosforge.org/projects/ruby/changeset/3862
Author: lsansonetti at apple.com
Date: 2010-03-24 23:12:31 -0700 (Wed, 24 Mar 2010)
Log Message:
-----------
always use ucs2 mode, added shared regexp matchers for #gsub, #scan and #split
Modified Paths:
--------------
MacRuby/trunk/re.cpp
MacRuby/trunk/re.h
MacRuby/trunk/string.c
Modified: MacRuby/trunk/re.cpp
===================================================================
--- MacRuby/trunk/re.cpp 2010-03-24 23:51:11 UTC (rev 3861)
+++ MacRuby/trunk/re.cpp 2010-03-25 06:12:31 UTC (rev 3862)
@@ -20,6 +20,8 @@
VALUE rb_cRegexp;
VALUE rb_cMatch;
+static VALUE rb_cRegexpMatcher;
+
typedef struct rb_regexp {
struct RBasic basic;
UnicodeString *unistr;
@@ -550,33 +552,88 @@
* p lhs # undefined local variable
*/
-int
-rb_reg_search(VALUE re, VALUE str, int pos, bool reverse)
+typedef struct rb_regexp_matcher {
+ struct RBasic basic;
+ UnicodeString *unistr;
+ RegexMatcher *matcher;
+} rb_regexp_matcher_t;
+
+static IMP regexp_matcher_finalize_imp_super = NULL;
+
+static void
+regexp_matcher_finalize_imp(void *rcv, SEL sel)
{
- const long len = rb_str_chars_len(str);
- if (pos > len || pos < 0) {
- rb_backref_set(Qnil);
- return -1;
+ rb_regexp_matcher_t *matcher = (rb_regexp_matcher_t *)rcv;
+ if (matcher->unistr != NULL) {
+ delete matcher->unistr;
+ matcher->unistr = NULL;
}
+ if (matcher->matcher != NULL) {
+ delete matcher->matcher;
+ matcher->matcher = NULL;
+ }
+ if (regexp_matcher_finalize_imp_super != NULL) {
+ ((void(*)(void *, SEL))regexp_matcher_finalize_imp_super)(rcv, sel);
+ }
+}
+VALUE
+rb_reg_matcher_new(VALUE re, VALUE str)
+{
+ NEWOBJ(matcher, struct rb_regexp_matcher);
+ OBJSETUP(matcher, rb_cRegexpMatcher, T_OBJECT);
UnicodeString *unistr = str_to_unistr(str);
assert(unistr != NULL);
UErrorCode status = U_ZERO_ERROR;
assert(RREGEXP(re)->pattern != NULL);
- RegexMatcher *matcher = RREGEXP(re)->pattern->matcher(*unistr, status);
+ RegexMatcher *regexp_matcher =
+ RREGEXP(re)->pattern->matcher(*unistr, status);
- if (matcher == NULL) {
+ if (regexp_matcher == NULL) {
delete unistr;
rb_raise(rb_eRegexpError, "can't create matcher: %s",
u_errorName(status));
}
+ matcher->matcher = regexp_matcher;
+ matcher->unistr = unistr;
+
+ return (VALUE)matcher;
+}
+
+void
+rb_reg_matcher_destroy(VALUE matcher)
+{
+ rb_regexp_matcher_t *m = (rb_regexp_matcher_t *)matcher;
+ if (m ->unistr != NULL) {
+ delete m ->unistr;
+ m->unistr = NULL;
+ }
+ if (m ->matcher != NULL) {
+ delete m ->matcher;
+ m->matcher = NULL;
+ }
+ xfree(m);
+}
+
+int
+rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse)
+{
+ rb_regexp_matcher *re_matcher = (rb_regexp_matcher *)matcher;
+
+ if (pos > re_matcher->unistr->length() || pos < 0) {
+ rb_backref_set(Qnil);
+ return -1;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+
if (reverse) {
const int orig = pos;
while (pos >= 0) {
- if (matcher->find(pos, status)) {
- if (matcher->start(status) <= orig) {
+ if (re_matcher->matcher->find(pos, status)) {
+ if (re_matcher->matcher->start(status) <= orig) {
break;
}
}
@@ -584,20 +641,18 @@
}
if (pos < 0) {
// No match.
- goto no_match;
+ rb_backref_set(Qnil);
+ return -1;
}
}
- else if (!matcher->find(pos, status)) {
+ else if (!re_matcher->matcher->find(pos, status)) {
// No match.
-no_match:
rb_backref_set(Qnil);
- delete matcher;
- delete unistr;
return -1;
}
// Match found.
- const int res_count = 1 + matcher->groupCount();
+ const int res_count = 1 + re_matcher->matcher->groupCount();
rb_match_result_t *res = NULL;
VALUE match = rb_backref_get();
@@ -628,23 +683,22 @@
}
RMATCH(match)->results_count = res_count;
- GC_WB(&RMATCH(match)->regexp, re);
+ if (RMATCH(match)->regexp != (rb_regexp_t *)re) {
+ GC_WB(&RMATCH(match)->regexp, re);
+ }
rb_str_set_len(RMATCH(match)->str, 0);
- rb_str_append_uchars(RMATCH(match)->str, unistr->getBuffer(),
- unistr->length());
+ rb_str_append_uchars(RMATCH(match)->str, re_matcher->unistr->getBuffer(),
+ re_matcher->unistr->length());
- res[0].beg = matcher->start(status);
- res[0].end = matcher->end(status);
+ res[0].beg = re_matcher->matcher->start(status);
+ res[0].end = re_matcher->matcher->end(status);
- for (int i = 0; i < matcher->groupCount(); i++) {
- res[i + 1].beg = matcher->start(i + 1, status);
- res[i + 1].end = matcher->end(i + 1, status);
+ for (int i = 0; i < re_matcher->matcher->groupCount(); i++) {
+ res[i + 1].beg = re_matcher->matcher->start(i + 1, status);
+ res[i + 1].end = re_matcher->matcher->end(i + 1, status);
}
- delete matcher;
- delete unistr;
-
return res[0].beg;
}
@@ -1178,6 +1232,12 @@
rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(REGEXP_OPT_EXTENDED));
rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(REGEXP_OPT_MULTILINE));
+ rb_cRegexpMatcher = rb_define_class("_RegexpMatcher", rb_cObject);
+
+ regexp_matcher_finalize_imp_super = rb_objc_install_method2(
+ (Class)rb_cRegexpMatcher, "finalize",
+ (IMP)regexp_matcher_finalize_imp);
+
Init_Match();
}
Modified: MacRuby/trunk/re.h
===================================================================
--- MacRuby/trunk/re.h 2010-03-24 23:51:11 UTC (rev 3861)
+++ MacRuby/trunk/re.h 2010-03-25 06:12:31 UTC (rev 3862)
@@ -21,9 +21,21 @@
VALUE rb_reg_quote(VALUE pat);
VALUE rb_reg_regcomp(VALUE str);
-int rb_reg_search(VALUE re, VALUE str, int pos, bool reverse);
VALUE rb_regexp_source(VALUE re);
+VALUE rb_reg_matcher_new(VALUE re, VALUE str);
+void rb_reg_matcher_destroy(VALUE matcher);
+int rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse);
+
+static inline int
+rb_reg_search(VALUE re, VALUE str, int pos, bool reverse)
+{
+ VALUE matcher = rb_reg_matcher_new(re, str);
+ const int res = rb_reg_matcher_search(re, matcher, pos, reverse);
+ rb_reg_matcher_destroy(matcher);
+ return res;
+}
+
int rb_reg_options_to_mri(int opt);
int rb_reg_options_from_mri(int mri_opt);
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2010-03-24 23:51:11 UTC (rev 3861)
+++ MacRuby/trunk/string.c 2010-03-25 06:12:31 UTC (rev 3862)
@@ -1109,7 +1109,7 @@
str_include_string(rb_str_t *self, rb_str_t *searched)
{
return str_offset_in_bytes_for_string(self, searched, 0,
- self->length_in_bytes, false) != -1;
+ self->length_in_bytes, true) != -1;
}
static rb_str_t *
@@ -1144,7 +1144,7 @@
if (IS_RSTR(str)) {
if (str_try_making_data_uchars(RSTR(str))) {
chars = RSTR(str)->data.uchars;
- chars_len = str_length(RSTR(str), false);
+ chars_len = str_length(RSTR(str), true);
}
else {
//assert(BINARY_ENC(RSTR(str)->encoding));
@@ -1180,7 +1180,7 @@
return Qnil;
}
- const long n = str_length(RSTR(str), false);
+ const long n = str_length(RSTR(str), true);
if (beg < 0) {
beg += n;
}
@@ -1194,7 +1194,7 @@
len = n - beg;
}
- rb_str_t *substr = str_get_characters(RSTR(str), beg, beg + len - 1, false);
+ rb_str_t *substr = str_get_characters(RSTR(str), beg, beg + len - 1, true);
return substr == NULL ? Qnil : (VALUE)substr;
}
@@ -1207,7 +1207,7 @@
rb_raise(rb_eIndexError, "negative length %ld", len);
}
- const long slen = str_length(RSTR(self), false);
+ const long slen = str_length(RSTR(self), true);
if (slen < beg) {
out_of_range:
rb_raise(rb_eIndexError, "index %ld out of string", beg);
@@ -1224,7 +1224,7 @@
rstr_modify(self);
- str_splice(RSTR(self), beg, len, strstr, false);
+ str_splice(RSTR(self), beg, len, strstr, true);
if (OBJ_TAINTED(strstr)) {
OBJ_TAINT(self);
@@ -1698,8 +1698,8 @@
default:
{
long beg = 0, len = 0;
- switch (rb_range_beg_len(indx, &beg, &len, str_length(RSTR(str),
- false), 0)) {
+ switch (rb_range_beg_len(indx, &beg, &len,
+ str_length(RSTR(str), true), 0)) {
case Qfalse:
break;
case Qnil:
@@ -1941,7 +1941,7 @@
static VALUE
rstr_index(VALUE self, SEL sel, int argc, VALUE *argv)
{
- const long len = str_length(RSTR(self), false);
+ const long len = str_length(RSTR(self), true);
VALUE sub, initpos;
long pos;
@@ -2009,7 +2009,7 @@
static VALUE
rstr_rindex(VALUE self, SEL sel, int argc, VALUE *argv)
{
- const long len = str_length(RSTR(self), false);
+ const long len = str_length(RSTR(self), true);
VALUE sub, initpos;
long pos;
@@ -2360,7 +2360,7 @@
continue;
}
const long pos = str_index_for_string(RSTR(str), str_need_string(tmp),
- 0, rb_str_chars_len(tmp), false, false);
+ 0, rb_str_chars_len(tmp), false, true);
if (pos == 0) {
return Qtrue;
}
@@ -2389,7 +2389,7 @@
continue;
}
const long pos = str_index_for_string(RSTR(str), str_need_string(tmp),
- len - sublen, len, false, false);
+ len - sublen, len, false, true);
if (pos == len - sublen) {
return Qtrue;
}
@@ -2730,9 +2730,11 @@
ary = rb_ary_new();
}
+ VALUE matcher = rb_reg_matcher_new(pat, self);
+
VALUE match = Qnil;
long start = 0;
- while (rb_reg_search(pat, self, start, false) >= 0) {
+ while (rb_reg_matcher_search(pat, matcher, start, false) >= 0) {
match = rb_backref_get();
int count = 0;
@@ -2777,6 +2779,8 @@
rb_backref_set(match);
+ rb_reg_matcher_destroy(matcher);
+
return block_given ? self : ary;
}
@@ -2827,7 +2831,7 @@
static VALUE
rstr_split(VALUE str, SEL sel, int argc, VALUE *argv)
{
- const long len = str_length(RSTR(str), false);
+ const long len = str_length(RSTR(str), true);
int lim = 0;
VALUE spat, limit;
@@ -2914,10 +2918,10 @@
}
else {
rb_str_t *spat_str = str_need_string(spat);
- const long spat_len = str_length(spat_str, false);
+ const long spat_len = str_length(spat_str, true);
do {
const long pos = str_index_for_string(RSTR(str), spat_str,
- beg, -1, false, false);
+ beg, -1, false, true);
if (pos == -1) {
break;
}
@@ -2930,9 +2934,10 @@
else {
long start = beg;
bool last_null = false;
+ VALUE matcher = rb_reg_matcher_new(spat, str);
again:
do {
- const long pos = rb_reg_search(spat, str, start, false);
+ const long pos = rb_reg_matcher_search(spat, matcher, start, false);
if (pos < 0) {
break;
}
@@ -2982,6 +2987,8 @@
}
}
while (limit == Qnil || --lim > 1);
+
+ rb_reg_matcher_destroy(matcher);
}
if (len > 0 && (!NIL_P(limit) || len > beg || lim_orig < 0)) {
@@ -3181,10 +3188,10 @@
if (rs == rb_default_rs
|| (rslen == 1 && rb_str_get_uchar(rs, 0) == '\n')) {
// Remove trailing carriage return.
- UChar c = str_get_uchar(RSTR(str), len - 1, false);
+ UChar c = str_get_uchar(RSTR(str), len - 1, true);
if (c == '\n') {
to_del++;
- c = len > 1 ? str_get_uchar(RSTR(str), len - 2, false) : 0;
+ c = len > 1 ? str_get_uchar(RSTR(str), len - 2, true) : 0;
}
if (c == '\r' && (rslen > 0 || to_del != 0)) {
to_del++;
@@ -3193,12 +3200,12 @@
else if (rslen == 0) {
// Remove all trailing carriage returns.
for (int i = len - 1; i >= 0; i--) {
- UChar c = str_get_uchar(RSTR(str), i, false);
+ UChar c = str_get_uchar(RSTR(str), i, true);
if (c != '\n') {
break;
}
to_del++;
- if (i > 0 && str_get_uchar(RSTR(str), i - 1, false) == '\r') {
+ if (i > 0 && str_get_uchar(RSTR(str), i - 1, true) == '\r') {
to_del++;
i--;
}
@@ -3207,7 +3214,7 @@
else if (rslen <= len) {
// Remove trailing substring.
if (str_index_for_string(RSTR(str), str_need_string(rs),
- len - rslen, -1, false, false) >= 0) {
+ len - rslen, -1, false, true) >= 0) {
to_del += rslen;
}
}
@@ -3215,7 +3222,7 @@
if (to_del == 0) {
return Qnil;
}
- str_delete(RSTR(str), len - to_del, to_del, false);
+ str_delete(RSTR(str), len - to_del, to_del, true);
return str;
}
@@ -3260,7 +3267,7 @@
{
rstr_modify(str);
- const long len = str_length(RSTR(str), false);
+ const long len = str_length(RSTR(str), true);
if (len == 0) {
return Qnil;
}
@@ -3271,7 +3278,7 @@
to_del++;
}
- str_delete(RSTR(str), len - to_del, to_del, false);
+ str_delete(RSTR(str), len - to_del, to_del, true);
return str;
}
@@ -3481,7 +3488,7 @@
rstr_modify(str);
str_splice(RSTR(str), results[0].beg, results[0].end - results[0].beg,
- str_need_string(repl), false);
+ str_need_string(repl), true);
if (OBJ_TAINTED(repl)) {
tainted = true;
}
@@ -3575,7 +3582,7 @@
VALUE dest = rb_str_new5(str, NULL, 0);
long offset = 0, last = 0;
bool changed = false;
- const long len = str_length(RSTR(str), false);
+ const long len = str_length(RSTR(str), true);
VALUE match = Qnil;
if (bang) {
@@ -3583,10 +3590,13 @@
rstr_modify(str);
}
+ VALUE matcher = rb_reg_matcher_new(pat, str);
+
while (true) {
- const long pos = rb_reg_search(pat, str, offset, false);
+ const long pos = rb_reg_matcher_search(pat, matcher, offset, false);
if (pos < 0) {
if (!changed) {
+ rb_reg_matcher_destroy(matcher);
return bang ? Qnil : rstr_dup(str, 0);
}
if (last < len) {
@@ -3639,6 +3649,8 @@
}
}
+ rb_reg_matcher_destroy(matcher);
+
rb_backref_set(match);
if (bang) {
@@ -3939,7 +3951,7 @@
if (padwidth > width) {
pad = RSTR(rstr_substr((VALUE)pad, 0, width));
}
- str_insert(str, index, pad, false);
+ str_insert(str, index, pad, true);
width -= padwidth;
index += padwidth;
}
@@ -3960,12 +3972,12 @@
}
rb_str_t *padstr = str_need_string(pad);
- const long padwidth = str_length(RSTR(padstr), false);
+ const long padwidth = str_length(RSTR(padstr), true);
if (padwidth == 0) {
rb_raise(rb_eArgError, "zero width padding");
}
- const long len = str_length(RSTR(str), false);
+ const long len = str_length(RSTR(str), true);
long width = NUM2LONG(w);
str = rb_str_new3(str);
if (width < 0 || width <= len) {
@@ -4051,7 +4063,7 @@
{
rstr_modify(str);
- long len = str_length(RSTR(str), false);
+ long len = str_length(RSTR(str), true);
if (len == 0) {
return Qnil;
}
@@ -4069,7 +4081,7 @@
}
if (pos > 0) {
- str_delete(RSTR(str), 0, pos, false);
+ str_delete(RSTR(str), 0, pos, true);
len -= pos;
changed = true;
}
@@ -4087,7 +4099,7 @@
}
if (pos < len - 1 && pos >= 0) {
- str_delete(RSTR(str), pos + 1, len - pos - 1, false);
+ str_delete(RSTR(str), pos + 1, len - pos - 1, true);
changed = true;
}
}
@@ -4260,13 +4272,13 @@
rs_str = str_need_string(rb_default_rs);
}
- const long len = str_length(RSTR(str), false);
+ const long len = str_length(RSTR(str), true);
const bool tainted = OBJ_TAINTED(str);
long pos = 0;
do {
const long off = str_index_for_string(RSTR(str), rs_str, pos, -1,
- false, false);
+ false, true);
long substr_len = 0;
if (off < 0) {
@@ -5929,7 +5941,7 @@
rb_str_get_uchar(VALUE str, long pos)
{
if (RSTR(str)) {
- return str_get_uchar(RSTR(str), pos, false);
+ return str_get_uchar(RSTR(str), pos, true);
}
assert(pos >= 0 && pos < CFStringGetLength((CFStringRef)str));
return CFStringGetCharacterAtIndex((CFStringRef)str, pos);
@@ -5965,7 +5977,7 @@
rb_str_chars_len(VALUE str)
{
if (IS_RSTR(str)) {
- return str_length(RSTR(str), false);
+ return str_length(RSTR(str), true);
}
return CFStringGetLength((CFStringRef)str);
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100324/cefe9541/attachment-0001.html>
More information about the macruby-changes
mailing list