#1012: Inconsistent Regex behaviour -------------------------------------------+-------------------------------- Reporter: harry@… | Owner: lsansonetti@… Type: defect | Status: new Priority: minor | Milestone: Component: MacRuby | Keywords: -------------------------------------------+-------------------------------- Comment(by watson1978@…):
Maybe it doesn't work for #rindex since it applies the regex backward? (ICU does not support that, so we currently mimic it, as you can see in re.c). I see. It seems to be necessary for me to learn more ICU X(
[[BR]] I modified the patch because I create the another bug.[[BR]] String#gsub, #scan and #split, those behavior has changed. Fortunately, for those methods, should use only uregex_findNext, not use uregex_setRegion. {{{ #!diff diff --git a/re.c b/re.c index afbed36..22514a2 100644 --- a/re.c +++ b/re.c @@ -747,8 +747,8 @@ rb_reg_matcher_destroy(VALUE matcher) xfree((void *)matcher); } -int -rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse) +static int +rb_reg_matcher_search_find(VALUE re, VALUE matcher, int pos, bool reverse, bool findFirst) { rb_regexp_matcher_t *re_matcher = (rb_regexp_matcher_t *)matcher; @@ -763,7 +763,7 @@ rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse) if (chars_len < 0) { chars_len = 0; } - + if (pos > chars_len || pos < 0) { rb_backref_set(Qnil); return -1; @@ -785,10 +785,15 @@ rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse) return -1; } } - else if (!uregex_find(re_matcher->pattern, pos, &status)) { - // No match. - rb_backref_set(Qnil); - return -1; + else { + if (findFirst) { + uregex_setRegion(re_matcher->pattern, pos, chars_len, &status); + } + if (!uregex_findNext(re_matcher->pattern, &status)) { + // No match. + rb_backref_set(Qnil); + return -1; + } } // Match found. @@ -839,6 +844,18 @@ rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse) return res[0].beg; } +int +rb_reg_matcher_search_first(VALUE re, VALUE matcher, int pos, bool reverse) +{ + return rb_reg_matcher_search_find(re, matcher, pos, reverse, true); +} + +int +rb_reg_matcher_search_next(VALUE re, VALUE matcher, int pos, bool reverse) +{ + return rb_reg_matcher_search_find(re, matcher, pos, reverse, false); +} + static long reg_match_pos(VALUE re, VALUE *strp, long pos) { diff --git a/re.h b/re.h index 595ee85..2006118 100644 --- a/re.h +++ b/re.h @@ -25,13 +25,15 @@ VALUE rb_regexp_source(VALUE re); VALUE rb_reg_matcher_new(VALUE re, VALUE str); void rb_reg_matcher_destroy(VALUE matcher); -int rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse); +int rb_reg_matcher_search_first(VALUE re, VALUE matcher, int pos, bool reverse); +int rb_reg_matcher_search_next(VALUE re, VALUE matcher, int pos, bool reverse); +#define rb_reg_matcher_search rb_reg_matcher_search_next static inline int rb_reg_search(VALUE re, VALUE str, int pos, bool reverse) { VALUE matcher = rb_reg_matcher_new(re, str); - const int res = rb_reg_matcher_search(re, matcher, pos, reverse); + const int res = rb_reg_matcher_search_first(re, matcher, pos, reverse); rb_reg_matcher_destroy(matcher); return res; } }}} Test Script: {{{ require 'test/unit/assertions.rb' include Test::Unit::Assertions str = "hello homely world. hah!" assert_equal("huh? homely world. hah!", str.gsub(/\Ah\S+\s*/, "huh? ")) assert_equal(["hello "], str.scan(/\Ah\S+\s*/)) assert_equal(["", "homely world. hah!"], str.split(/\Ah\S+\s*/)) assert_equal(" Text\n", "Text\n".gsub(/^/, ' ')) assert_equal(" Text\n Foo", "Text\nFoo".gsub(/^/, ' ')) str = "0123456789" assert_equal(["01", "23", "45", "67", "89"], str.scan(/\G\d\d/)) assert_equal("x23456789", str.sub(/\G\d\d/, "x")) assert_equal("xxxxx", str.gsub(/\G\d\d/, "x")) assert_equal(2, str.index(/\G\d\d/, 2)) # not yet #assert_equal(2, str.rindex(/\G\d\d/, 2)) puts :ok }}} -- Ticket URL: <http://www.macruby.org/trac/ticket/1012#comment:5> MacRuby <http://macruby.org/>