[MacRuby] #1012: Inconsistent Regex behaviour

MacRuby ruby-noreply at macosforge.org
Mon Nov 29 20:27:07 PST 2010


#1012: Inconsistent Regex behaviour
-------------------------------------------+--------------------------------
 Reporter:  harry@…                        |       Owner:  lsansonetti@…        
     Type:  defect                         |      Status:  new                  
 Priority:  minor                          |   Milestone:                       
Component:  MacRuby                        |    Keywords:                       
-------------------------------------------+--------------------------------

Comment(by watson1978@…):

 > Maybe it doesn't work for #rindex since it applies the regex backward?
 (ICU does not support that, so we currently mimic it, as you can see in
 re.c).
 I see. It seems to be necessary for me to learn more ICU X(

 [[BR]]
 I modified the patch because I create the another bug.[[BR]]
 String#gsub, #scan and #split, those behavior has changed.

 Fortunately, for those methods, should use only uregex_findNext, not use
 uregex_setRegion.
 {{{
 #!diff
 diff --git a/re.c b/re.c
 index afbed36..22514a2 100644
 --- a/re.c
 +++ b/re.c
 @@ -747,8 +747,8 @@ rb_reg_matcher_destroy(VALUE matcher)
      xfree((void *)matcher);
  }

 -int
 -rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse)
 +static int
 +rb_reg_matcher_search_find(VALUE re, VALUE matcher, int pos, bool
 reverse, bool findFirst)
  {
      rb_regexp_matcher_t *re_matcher = (rb_regexp_matcher_t *)matcher;

 @@ -763,7 +763,7 @@ rb_reg_matcher_search(VALUE re, VALUE matcher, int
 pos, bool reverse)
      if (chars_len < 0) {
         chars_len = 0;
      }
 -
 +
      if (pos > chars_len || pos < 0) {
         rb_backref_set(Qnil);
         return -1;
 @@ -785,10 +785,15 @@ rb_reg_matcher_search(VALUE re, VALUE matcher, int
 pos, bool reverse)
             return -1;
         }
      }
 -    else if (!uregex_find(re_matcher->pattern, pos, &status)) {
 -       // No match.
 -       rb_backref_set(Qnil);
 -       return -1;
 +    else {
 +       if (findFirst) {
 +           uregex_setRegion(re_matcher->pattern, pos, chars_len,
 &status);
 +       }
 +       if (!uregex_findNext(re_matcher->pattern, &status)) {
 +           // No match.
 +           rb_backref_set(Qnil);
 +           return -1;
 +       }
      }

      // Match found.
 @@ -839,6 +844,18 @@ rb_reg_matcher_search(VALUE re, VALUE matcher, int
 pos, bool reverse)
      return res[0].beg;
  }

 +int
 +rb_reg_matcher_search_first(VALUE re, VALUE matcher, int pos, bool
 reverse)
 +{
 +    return rb_reg_matcher_search_find(re, matcher, pos, reverse, true);
 +}
 +
 +int
 +rb_reg_matcher_search_next(VALUE re, VALUE matcher, int pos, bool
 reverse)
 +{
 +    return rb_reg_matcher_search_find(re, matcher, pos, reverse, false);
 +}
 +
  static long
  reg_match_pos(VALUE re, VALUE *strp, long pos)
  {
 diff --git a/re.h b/re.h
 index 595ee85..2006118 100644
 --- a/re.h
 +++ b/re.h
 @@ -25,13 +25,15 @@ VALUE rb_regexp_source(VALUE re);

  VALUE rb_reg_matcher_new(VALUE re, VALUE str);
  void rb_reg_matcher_destroy(VALUE matcher);
 -int rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool
 reverse);
 +int rb_reg_matcher_search_first(VALUE re, VALUE matcher, int pos, bool
 reverse);
 +int rb_reg_matcher_search_next(VALUE re, VALUE matcher, int pos, bool
 reverse);
 +#define rb_reg_matcher_search rb_reg_matcher_search_next

  static inline int
  rb_reg_search(VALUE re, VALUE str, int pos, bool reverse)
  {
      VALUE matcher = rb_reg_matcher_new(re, str);
 -    const int res = rb_reg_matcher_search(re, matcher, pos, reverse);
 +    const int res = rb_reg_matcher_search_first(re, matcher, pos,
 reverse);
      rb_reg_matcher_destroy(matcher);
      return res;
  }
 }}}

 Test Script:
 {{{
 require 'test/unit/assertions.rb'
 include Test::Unit::Assertions

 str = "hello homely world. hah!"
 assert_equal("huh? homely world. hah!", str.gsub(/\Ah\S+\s*/, "huh? "))
 assert_equal(["hello "], str.scan(/\Ah\S+\s*/))
 assert_equal(["", "homely world. hah!"], str.split(/\Ah\S+\s*/))

 assert_equal(" Text\n", "Text\n".gsub(/^/, ' '))
 assert_equal(" Text\n Foo", "Text\nFoo".gsub(/^/, ' '))


 str = "0123456789"
 assert_equal(["01", "23", "45", "67", "89"], str.scan(/\G\d\d/))
 assert_equal("x23456789", str.sub(/\G\d\d/, "x"))
 assert_equal("xxxxx", str.gsub(/\G\d\d/, "x"))
 assert_equal(2, str.index(/\G\d\d/, 2))

 # not yet
 #assert_equal(2, str.rindex(/\G\d\d/, 2))

 puts :ok
 }}}

-- 
Ticket URL: <http://www.macruby.org/trac/ticket/1012#comment:5>
MacRuby <http://macruby.org/>



More information about the macruby-tickets mailing list