[macruby-changes] [3576] MacRuby/branches/icu

Thu Feb 18 19:58:07 PST 2010

Revision: 3576
          http://trac.macosforge.org/projects/ruby/changeset/3576
Author:   lsansonetti at apple.com
Date:     2010-02-18 19:58:06 -0800 (Thu, 18 Feb 2010)
Log Message:
-----------
more regexp work

Modified Paths:
--------------
    MacRuby/branches/icu/dispatcher.cpp
    MacRuby/branches/icu/include/ruby/intern.h
    MacRuby/branches/icu/parse.y
    MacRuby/branches/icu/re.cpp

Added Paths:
-----------
    MacRuby/branches/icu/re.h

Modified: MacRuby/branches/icu/dispatcher.cpp
===================================================================

--- MacRuby/branches/icu/dispatcher.cpp	2010-02-19 01:28:09 UTC (rev 3575)
+++ MacRuby/branches/icu/dispatcher.cpp	2010-02-19 03:58:06 UTC (rev 3576)
@@ -16,6 +16,7 @@
 #include "dtrace.h"
 #include "array.h"
 #include "hash.h"
+#include "re.h"
 
 #include <execinfo.h>
 #include <dlfcn.h>
@@ -1264,7 +1265,7 @@
 	    return rb_str_equal(self, other);
 
 	case T_REGEXP:
-	    return rb_reg_eqq(self, selEqq, other);
+	    return regexp_eqq(self, selEqq, other);
 
 	case T_SYMBOL:
 	    return (self == other ? Qtrue : Qfalse);

Modified: MacRuby/branches/icu/include/ruby/intern.h
===================================================================
--- MacRuby/branches/icu/include/ruby/intern.h	2010-02-19 01:28:09 UTC (rev 3575)
+++ MacRuby/branches/icu/include/ruby/intern.h	2010-02-19 03:58:06 UTC (rev 3576)
@@ -519,7 +519,6 @@
 VALUE rb_reg_new(const char *, long, int);
 VALUE rb_reg_match(VALUE, VALUE);
 int rb_reg_options(VALUE);
-VALUE rb_reg_eqq(VALUE, SEL, VALUE);
 void rb_set_kcode(const char*);
 const char* rb_get_kcode(void);
 /* ruby.c */

Modified: MacRuby/branches/icu/parse.y
===================================================================
--- MacRuby/branches/icu/parse.y	2010-02-19 01:28:09 UTC (rev 3575)
+++ MacRuby/branches/icu/parse.y	2010-02-19 03:58:06 UTC (rev 3576)
@@ -23,6 +23,7 @@
 #include "ruby/encoding.h"
 #include "encoding.h"
 #include "id.h"
+#include "re.h"
 #include <stdio.h>
 #include <errno.h>
 #include <ctype.h>
@@ -5626,23 +5627,19 @@
     return 0;
 }
 
-extern int rb_char_to_option_kcode(int c, int *option, int *kcode);
-
 static int
 parser_regx_options(struct parser_params *parser)
 {
-    int kcode = 0;
     int options = 0;
-    int c, opt, kc;
+    int c, opt;
 
     newtok();
     while (c = nextc(), ISALPHA(c)) {
         if (c == 'o') {
             options |= RE_OPTION_ONCE;
         }
-        else if (rb_char_to_option_kcode(c, &opt, &kc)) {
+	else if (rb_char_to_icu_option(c, &opt)) {
             options |= opt;
-	    if (kc >= 0) kcode = c;
         }
         else {
 	    tokadd(c);
@@ -5654,7 +5651,7 @@
 	compile_error(PARSER_ARG "unknown regexp option%s - %s",
 		      toklen() > 1 ? "s" : "", tok());
     }
-    return options | RE_OPTION_ENCODING(kcode);
+    return options;
 }
 
 static void

Modified: MacRuby/branches/icu/re.cpp
===================================================================
--- MacRuby/branches/icu/re.cpp	2010-02-19 01:28:09 UTC (rev 3575)
+++ MacRuby/branches/icu/re.cpp	2010-02-19 03:58:06 UTC (rev 3576)
@@ -11,6 +11,7 @@
 #include "unicode/unistr.h"
 #include "ruby/ruby.h"
 #include "encoding.h"
+#include "objc.h"
 
 extern "C" {
 
@@ -42,20 +43,20 @@
 #define RMATCH(o) ((rb_match_t *)o)
 
 static rb_regexp_t *
-regexp_alloc(void)
+regexp_alloc(VALUE klass, SEL sel)
 {
     NEWOBJ(re, struct rb_regexp);
-    OBJSETUP(re, rb_cRegexp, T_REGEXP);
+    OBJSETUP(re, klass, T_REGEXP);
     re->unistr = NULL;
     re->pattern = NULL;
     return re;
 }
 
 static rb_match_t *
-match_alloc(void)
+match_alloc(VALUE klass, SEL sel)
 {
     NEWOBJ(match, struct rb_match);
-    OBJSETUP(match, rb_cMatch, T_MATCH);
+    OBJSETUP(match, klass, T_MATCH);
     match->regexp = NULL;
     match->unistr = NULL;
     return match;
@@ -74,7 +75,18 @@
     }
 }
 
+static IMP regexp_finalize_imp_super = NULL; 
+
 static void
+regexp_finalize_imp(void *rcv, SEL sel)
+{
+    regexp_finalize(RREGEXP(rcv));
+    if (regexp_finalize_imp_super != NULL) {
+	((void(*)(void *, SEL))regexp_finalize_imp_super)(rcv, sel);
+    }
+}
+
+static void
 match_finalize(rb_match_t *match)
 {
     if (match->unistr != NULL) {
@@ -83,6 +95,17 @@
     }
 }
 
+static IMP match_finalize_imp_super = NULL; 
+
+static void
+match_finalize_imp(void *rcv, SEL sel)
+{
+    match_finalize(RMATCH(rcv));
+    if (match_finalize_imp_super != NULL) {
+	((void(*)(void *, SEL))match_finalize_imp_super)(rcv, sel);
+    }
+}
+
 static UnicodeString *
 str_to_unistr(VALUE str)
 {
@@ -97,7 +120,6 @@
     if (need_free && chars != NULL) {
 	free(chars);
     }
-
     return unistr;
 }
 
@@ -109,7 +131,7 @@
 
     UParseError pe;
     UErrorCode status = U_ZERO_ERROR;
-    RegexPattern *pattern = RegexPattern::compile(*unistr, pe, status);
+    RegexPattern *pattern = RegexPattern::compile(*unistr, option, pe, status);
 
     if (pattern == NULL) {
 	delete unistr;
@@ -129,14 +151,71 @@
     return true;
 }
 
+static void
+init_from_regexp(rb_regexp_t *regexp, rb_regexp_t *from)
+{
+    regexp_finalize(regexp);
+    regexp->unistr = new UnicodeString(*from->unistr);
+    regexp->pattern = new RegexPattern(*from->pattern);
+}
+
 static VALUE
-rb_regexp_alloc(VALUE klass, SEL sel)
+rb_str_compile_regexp(VALUE str, int options, VALUE *excp)
 {
-    return (VALUE)regexp_alloc();
+    rb_regexp_t *regexp = regexp_alloc(rb_cRegexp, 0);
+    if (!init_from_string(regexp, str, options, excp)) {
+	return Qnil;
+    }
+    return (VALUE)regexp;
 }
 
+#define REGEXP_OPT_IGNORECASE 	(UREGEX_CASE_INSENSITIVE)
+#define REGEXP_OPT_EXTENDED 	(UREGEX_COMMENTS)
+#define REGEXP_OPT_MULTILINE	(UREGEX_MULTILINE | UREGEX_DOTALL)
+
+bool
+rb_char_to_icu_option(int c, int *option)
+{
+    assert(option != NULL);
+    switch (c) {
+	case 'i':
+	    *option = REGEXP_OPT_IGNORECASE;
+	    return true;
+	case 'x':
+	    *option = REGEXP_OPT_EXTENDED;
+	    return true;
+	case 'm':
+	    *option = REGEXP_OPT_MULTILINE;
+	    return true;
+    }
+    *option = -1;
+    return false;
+}
+
+/*
+ *  call-seq:
+ *     Regexp.new(string [, options])                => regexp
+ *     Regexp.new(regexp)                            => regexp
+ *     Regexp.compile(string [, options])            => regexp
+ *     Regexp.compile(regexp)                        => regexp
+ *
+ *  Constructs a new regular expression from <i>pattern</i>, which can be either
+ *  a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
+ *  options are propagated, and new options may not be specified (a change as of
+ *  Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
+ *  more of the constants <code>Regexp::EXTENDED</code>,
+ *  <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
+ *  <em>or</em>-ed together. Otherwise, if <i>options</i> is not
+ *  <code>nil</code>, the regexp will be case insensitive.
+ *
+ *     r1 = Regexp.new('^a-z+:\\s+\w+')           #=> /^a-z+:\s+\w+/
+ *     r2 = Regexp.new('cat', true)               #=> /cat/i
+ *     r3 = Regexp.new('dog', Regexp::EXTENDED)   #=> /dog/x
+ *     r4 = Regexp.new(r2)                        #=> /cat/i
+ */
+
 static VALUE
-rb_regexp_initialize(VALUE self, SEL sel, int argc, VALUE *argv)
+regexp_initialize(VALUE self, SEL sel, int argc, VALUE *argv)
 {
     if (argc == 0 || argc > 3) {
 	rb_raise(rb_eArgError, "wrong number of arguments");
@@ -147,9 +226,7 @@
 	    rb_warn("flags ignored");
 	}
 	assert(RREGEXP(re)->pattern != NULL);
-	regexp_finalize(RREGEXP(self));
-	RREGEXP(self)->unistr = new UnicodeString(*RREGEXP(re)->unistr);
-	RREGEXP(self)->pattern = new RegexPattern(*RREGEXP(re)->pattern);
+	init_from_regexp(RREGEXP(self), RREGEXP(re));
     }
     else {
 	int options = 0;
@@ -157,11 +234,9 @@
 	    if (FIXNUM_P(argv[1])) {
 		options = FIX2INT(argv[1]);
 	    }
-#if 0 // TODO
 	    else if (RTEST(argv[1])) {
-		options = ONIG_OPTION_IGNORECASE;
+		options = REGEXP_OPT_IGNORECASE;
 	    }
-#endif
 	}
 	VALUE str = argv[0];
 	StringValue(str);
@@ -175,16 +250,91 @@
 }
 
 static VALUE
-rb_regexp_inspect(VALUE rcv, SEL sel)
+regexp_initialize_copy(VALUE rcv, SEL sel, VALUE other)
 {
-    assert(RREGEXP(rcv)->unistr != NULL);
-    const UChar *chars = RREGEXP(rcv)->unistr->getBuffer();
-    const int32_t chars_len = RREGEXP(rcv)->unistr->length();
-    assert(chars_len >= 0);
-    return rb_unicode_str_new(chars, chars_len);
+    if (TYPE(other) != T_REGEXP) {
+	rb_raise(rb_eTypeError, "wrong argument type");
+    }
+    init_from_regexp(RREGEXP(rcv), RREGEXP(other));
+    return rcv;
 }
 
+/*
+ *  call-seq:
+ *     rxp == other_rxp      => true or false
+ *     rxp.eql?(other_rxp)   => true or false
+ *
+ *  Equality---Two regexps are equal if their patterns are identical, they have
+ *  the same character set code, and their <code>casefold?</code> values are the
+ *  same.
+ *
+ *     /abc/  == /abc/x   #=> false
+ *     /abc/  == /abc/i   #=> false
+ *     /abc/  == /abc/n   #=> false
+ *     /abc/u == /abc/n   #=> false
+ */
+
 static VALUE
+regexp_equal(VALUE rcv, SEL sel, VALUE other)
+{
+    if (rcv == other) {
+	return Qtrue;
+    }
+    if (TYPE(other) != T_REGEXP) {
+	return Qfalse;
+    }
+
+    assert(RREGEXP(rcv)->unistr != NULL && RREGEXP(rcv)->pattern != NULL);
+    assert(RREGEXP(other)->unistr != NULL && RREGEXP(other)->pattern != NULL);
+
+    // Using the == operator on the RegexpPatterns does not work, for a
+    // reason... so we are comparing source strings and flags.
+    return *RREGEXP(rcv)->unistr == *RREGEXP(other)->unistr
+	&& RREGEXP(rcv)->pattern->flags() == RREGEXP(other)->pattern->flags()
+	? Qtrue : Qfalse;
+}
+
+/*
+ *  call-seq:
+ *     rxp =~ str    => integer or nil
+ *
+ *  Match---Matches <i>rxp</i> against <i>str</i>.
+ *
+ *     /at/ =~ "input data"   #=> 7
+ *     /ax/ =~ "input data"   #=> nil
+ *
+ *  If <code>=~</code> is used with a regexp literal with named captures,
+ *  captured strings (or nil) is assigned to local variables named by
+ *  the capture names.
+ *
+ *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = y  "
+ *     p lhs    #=> "x"
+ *     p rhs    #=> "y"
+ *
+ *  If it is not matched, nil is assigned for the variables.
+ *
+ *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = "   
+ *     p lhs    #=> nil
+ *     p rhs    #=> nil
+ *
+ *  This assignment is implemented in the Ruby parser.
+ *  So a regexp literal is required for the assignment. 
+ *  The assignment is not occur if the regexp is not a literal.
+ *
+ *     re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
+ *     re =~ "  x = "
+ *     p lhs    # undefined local variable
+ *     p rhs    # undefined local variable
+ *
+ *  A regexp interpolation, <code>#{}</code>, also disables
+ *  the assignment.
+ *
+ *     rhs_pat = /(?<rhs>\w+)/
+ *     /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
+ *     p lhs    # undefined local variable
+ */
+
+static VALUE
 reg_operand(VALUE s, bool check)
 {
     if (SYMBOL_P(s)) {
@@ -249,7 +399,7 @@
 
     VALUE match = rb_backref_get();
     if (NIL_P(match)) {
-	match = (VALUE)match_alloc();
+	match = (VALUE)match_alloc(rb_cMatch, 0);
 	rb_backref_set(match);
     }
 
@@ -262,7 +412,7 @@
     return res[0].beg;
 }
 
-long
+static long
 reg_match_pos(VALUE re, VALUE *strp, long pos)
 {
     VALUE str = *strp;
@@ -286,8 +436,46 @@
 }
 
 static VALUE
-rb_regexp_match(VALUE rcv, SEL sel, int argc, VALUE *argv)
+regexp_match(VALUE rcv, SEL sel, VALUE str)
 {
+    const long pos = reg_match_pos(rcv, &str, 0);
+    if (pos < 0) {
+	return Qnil;
+    }
+    return LONG2FIX(pos);
+}
+
+/*
+ *  call-seq:
+ *     rxp.match(str)       => matchdata or nil
+ *     rxp.match(str,pos)   => matchdata or nil
+ *
+ *  Returns a <code>MatchData</code> object describing the match, or
+ *  <code>nil</code> if there was no match. This is equivalent to retrieving the
+ *  value of the special variable <code>$~</code> following a normal match.
+ *  If the second parameter is present, it specifies the position in the string
+ *  to begin the search.
+ *
+ *     /(.)(.)(.)/.match("abc")[2]   #=> "b"
+ *     /(.)(.)/.match("abc", 1)[2]   #=> "c"
+ *     
+ *  If a block is given, invoke the block with MatchData if match succeed, so
+ *  that you can write
+ *     
+ *     pat.match(str) {|m| ...}
+ *     
+ *  instead of
+ *      
+ *     if m = pat.match(str)
+ *       ...
+ *     end
+ *      
+ *  The return value is a value from block execution in this case.
+ */
+
+static VALUE
+regexp_match2(VALUE rcv, SEL sel, int argc, VALUE *argv)
+{
     VALUE result, str, initpos;
     long pos;
 
@@ -311,7 +499,248 @@
     return result;
 }
 
+/*
+ *  call-seq:
+ *     ~ rxp   => integer or nil
+ *
+ *  Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
+ *  Equivalent to <code><i>rxp</i> =~ $_</code>.
+ *
+ *     $_ = "input data"
+ *     ~ /at/   #=> 7
+ */
+
+static VALUE
+regexp_match3(VALUE rcv, SEL sel)
+{
+    VALUE line = rb_lastline_get();
+    if (TYPE(line) != T_STRING) {
+	rb_backref_set(Qnil);
+	return Qnil;
+    }
+
+    const long start = rb_reg_search(rcv, line, 0, 0);
+    if (start < 0) {
+	return Qnil;
+    }
+    return LONG2FIX(start);
+}
+
+/*
+ *  call-seq:
+ *     rxp === str   => true or false
+ *
+ *  Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
+ *
+ *     a = "HELLO"
+ *     case a
+ *     when /^[a-z]*$/; print "Lower case\n"
+ *     when /^[A-Z]*$/; print "Upper case\n"
+ *     else;            print "Mixed case\n"
+ *     end
+ *
+ *  <em>produces:</em>
+ *
+ *     Upper case
+ */
+
 VALUE
+regexp_eqq(VALUE rcv, SEL sel, VALUE str)
+{
+    str = reg_operand(str, Qfalse);
+    if (NIL_P(str)) {
+	rb_backref_set(Qnil);
+	return Qfalse;
+    }
+    const long start = rb_reg_search(rcv, str, 0, 0);
+    if (start < 0) {
+	return Qfalse;
+    }
+    return Qtrue;
+}
+
+/*
+ *  call-seq:
+ *      rxp.source   => str
+ *
+ *  Returns the original string of the pattern.
+ *
+ *      /ab+c/ix.source #=> "ab+c"
+ *
+ *  Note that escape sequences are retained as is.
+ *
+ *     /\x20\+/.source  #=> "\\x20\\+"
+ *
+ */
+
+static VALUE
+regexp_source(VALUE rcv, SEL sel)
+{
+    assert(RREGEXP(rcv)->unistr != NULL);
+
+    const UChar *chars = RREGEXP(rcv)->unistr->getBuffer();
+    const int32_t chars_len = RREGEXP(rcv)->unistr->length();
+    assert(chars_len >= 0);
+
+    VALUE str = rb_unicode_str_new(chars, chars_len);
+
+    if (OBJ_TAINTED(rcv)) {
+	OBJ_TAINT(str);
+    }
+    return str;
+}
+
+/*
+ * call-seq:
+ *    rxp.inspect   => string
+ *
+ * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
+ * <code>#inspect</code> actually produces the more natural version of
+ * the string than <code>#to_s</code>.
+ *
+ *      /ab+c/ix.inspect        #=> "/ab+c/ix"
+ *
+ */
+
+static VALUE
+regexp_inspect(VALUE rcv, SEL sel)
+{
+    return regexp_source(rcv, 0);
+}
+
+/*
+ *  call-seq:
+ *     rxp.casefold?   => true or false
+ *
+ *  Returns the value of the case-insensitive flag.
+ *
+ *      /a/.casefold?           #=> false
+ *      /a/i.casefold?          #=> true
+ *      /(?i:a)/.casefold?      #=> false
+ */
+
+int
+rb_reg_options(VALUE re)
+{
+    assert(RREGEXP(re)->pattern != NULL);
+    return RREGEXP(re)->pattern->flags();
+}
+
+static VALUE
+regexp_casefold(VALUE rcv, SEL sel)
+{
+    return rb_reg_options(rcv) & REGEXP_OPT_IGNORECASE ? Qtrue : Qfalse;
+}
+
+/*
+ *  call-seq:
+ *     rxp.options   => fixnum
+ *
+ *  Returns the set of bits corresponding to the options used when creating this
+ *  Regexp (see <code>Regexp::new</code> for details. Note that additional bits
+ *  may be set in the returned options: these are used internally by the regular
+ *  expression code. These extra bits are ignored if the options are passed to
+ *  <code>Regexp::new</code>.
+ *
+ *     Regexp::IGNORECASE                  #=> 1
+ *     Regexp::EXTENDED                    #=> 2
+ *     Regexp::MULTILINE                   #=> 4
+ *
+ *     /cat/.options                       #=> 0
+ *     /cat/ix.options                     #=> 3
+ *     Regexp.new('cat', true).options     #=> 1
+ *     /\xa1\xa2/e.options                 #=> 16
+ *
+ *     r = /cat/ix
+ *     Regexp.new(r.source, r.options)     #=> /cat/ix
+ */
+
+static VALUE
+regexp_options(VALUE rcv, SEL sel)
+{
+    return INT2FIX(rb_reg_options(rcv));
+}
+
+/*
+ *  Document-class: Regexp
+ *
+ *  A <code>Regexp</code> holds a regular expression, used to match a pattern
+ *  against strings. Regexps are created using the <code>/.../</code> and
+ *  <code>%r{...}</code> literals, and by the <code>Regexp::new</code>
+ *  constructor.
+ *
+ */
+
+static void Init_Match(void);
+
+void
+Init_Regexp(void)
+{
+    rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
+
+#if 0
+    rb_define_virtual_variable("$~", match_getter, match_setter);
+    rb_define_virtual_variable("$&", last_match_getter, 0);
+    rb_define_virtual_variable("$`", prematch_getter, 0);
+    rb_define_virtual_variable("$'", postmatch_getter, 0);
+    rb_define_virtual_variable("$+", last_paren_match_getter, 0);
+
+    rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
+    rb_define_virtual_variable("$KCODE", kcode_getter, kcode_setter);
+    rb_define_virtual_variable("$-K", kcode_getter, kcode_setter);
+#endif
+
+    rb_cRegexp = rb_define_class("Regexp", rb_cObject);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "alloc",
+	    (void *)regexp_alloc, 0);
+#if 0
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "compile",
+	    rb_class_new_instance_imp, -1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "quote", rb_reg_s_quote, 1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "escape", rb_reg_s_quote, 1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "union", rb_reg_s_union_m, -2);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "last_match",
+	    rb_reg_s_last_match, -1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "try_convert",
+	    rb_reg_s_try_convert, 1);
+#endif
+
+    regexp_finalize_imp_super = rb_objc_install_method2((Class)rb_cRegexp,
+	    "finalize", (IMP)regexp_finalize_imp);
+
+    rb_objc_define_method(rb_cRegexp, "initialize",
+	    (void *)regexp_initialize, -1);
+    rb_objc_define_method(rb_cRegexp, "initialize_copy",
+	    (void *)regexp_initialize_copy, 1);
+    //rb_objc_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
+    rb_objc_define_method(rb_cRegexp, "eql?", (void *)regexp_equal, 1);
+    rb_objc_define_method(rb_cRegexp, "==", (void *)regexp_equal, 1);
+    rb_objc_define_method(rb_cRegexp, "=~", (void *)regexp_match, 1);
+    rb_objc_define_method(rb_cRegexp, "match", (void *)regexp_match2, -1);
+    rb_objc_define_method(rb_cRegexp, "~", (void *)regexp_match3, 0);
+    rb_objc_define_method(rb_cRegexp, "===", (void *)regexp_eqq, 1);
+    //rb_objc_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
+    rb_objc_define_method(rb_cRegexp, "source", (void *)regexp_source, 0);
+    rb_objc_define_method(rb_cRegexp, "casefold?", (void *)regexp_casefold, 0);
+    rb_objc_define_method(rb_cRegexp, "options", (void *)regexp_options, 0);
+#if 0
+    rb_objc_define_method(rb_cRegexp, "encoding", rb_reg_encoding, 0);
+    rb_objc_define_method(rb_cRegexp, "fixed_encoding?",
+	    rb_reg_fixed_encoding_p, 0);
+    rb_objc_define_method(rb_cRegexp, "names", rb_reg_names, 0);
+    rb_objc_define_method(rb_cRegexp, "named_captures",
+	    rb_reg_named_captures, 0);
+#endif
+    rb_objc_define_method(rb_cRegexp, "inspect", (void *)regexp_inspect, 0);
+
+    rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(REGEXP_OPT_IGNORECASE));
+    rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(REGEXP_OPT_EXTENDED));
+    rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(REGEXP_OPT_MULTILINE));
+
+    Init_Match();
+}
+
+VALUE
 rb_reg_nth_match(int nth, VALUE match)
 {
     if (NIL_P(match)) {
@@ -337,6 +766,32 @@
     return rb_unicode_str_new(&chars[beg], len);
 }
 
+VALUE
+rb_reg_last_match(VALUE match)
+{
+    return rb_reg_nth_match(0, match);
+}
+
+/*
+ * call-seq:
+ *    mtch.inspect   => str
+ *
+ * Returns a printable version of <i>mtch</i>.
+ *
+ *     puts /.$/.match("foo").inspect
+ *     #=> #<MatchData "o">
+ *
+ *     puts /(.)(.)(.)/.match("foo").inspect
+ *     #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
+ *
+ *     puts /(.)(.)?(.)/.match("fo").inspect
+ *     #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
+ *
+ *     puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
+ *     #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
+ *
+ */
+
 static VALUE
 match_inspect(VALUE rcv, SEL sel)
 {
@@ -361,74 +816,73 @@
     return str;
 }
 
-void
-Init_Regexp(void)
+/*
+ *  call-seq:
+ *     mtch.string   => str
+ *
+ *  Returns a frozen copy of the string passed in to <code>match</code>.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.string   #=> "THX1138."
+ */
+
+static VALUE
+match_string(VALUE rcv, SEL sel)
 {
-    rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
+    UnicodeString *unistr = RMATCH(rcv)->unistr;
+    assert(unistr != NULL);
+    VALUE str = rb_unicode_str_new(unistr->getBuffer(), unistr->length());
+    OBJ_FREEZE(str);
+    return str;
+}
 
-#if 0
-    rb_define_virtual_variable("$~", match_getter, match_setter);
-    rb_define_virtual_variable("$&", last_match_getter, 0);
-    rb_define_virtual_variable("$`", prematch_getter, 0);
-    rb_define_virtual_variable("$'", postmatch_getter, 0);
-    rb_define_virtual_variable("$+", last_paren_match_getter, 0);
+/*
+ *  call-seq:
+ *     mtch.to_s   => str
+ *
+ *  Returns the entire matched string.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.to_s   #=> "HX1138"
+ */
 
-    rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
-    rb_define_virtual_variable("$KCODE", kcode_getter, kcode_setter);
-    rb_define_virtual_variable("$-K", kcode_getter, kcode_setter);
-#endif
+static VALUE
+match_to_s(VALUE rcv, SEL sel)
+{
+    VALUE str = rb_reg_last_match(rcv);
 
-    rb_cRegexp = rb_define_class("Regexp", rb_cObject);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "alloc",
-	    (void *)rb_regexp_alloc, 0);
-#if 0
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "compile",
-	    rb_class_new_instance_imp, -1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "quote", rb_reg_s_quote, 1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "escape", rb_reg_s_quote, 1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "union", rb_reg_s_union_m, -2);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "last_match",
-	    rb_reg_s_last_match, -1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "try_convert",
-	    rb_reg_s_try_convert, 1);
+    if (NIL_P(str)) {
+	str = rb_str_new(0, 0);
+    }
+    if (OBJ_TAINTED(rcv)) {
+	OBJ_TAINT(str);
+    }
+    return str;
+}
 
-    rb_objc_reg_finalize_super = rb_objc_install_method2((Class)rb_cRegexp,
-	    "finalize", (IMP)rb_objc_reg_finalize);
-#endif
+/*
+ *  Document-class: MatchData
+ *
+ *  <code>MatchData</code> is the type of the special variable <code>$~</code>,
+ *  and is the type of the object returned by <code>Regexp#match</code> and
+ *  <code>Regexp.last_match</code>. It encapsulates all the results of a pattern
+ *  match, results normally accessed through the special variables
+ *  <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
+ *  <code>$2</code>, and so on.
+ *
+ */
 
-    rb_objc_define_method(rb_cRegexp, "initialize",
-	    (void *)rb_regexp_initialize, -1);
+static void
+Init_Match(void)
+{
+    rb_cMatch = rb_define_class("MatchData", rb_cObject);
+    rb_objc_define_method(*(VALUE *)rb_cMatch, "alloc", (void *)match_alloc, 0);
+    rb_undef_method(CLASS_OF(rb_cMatch), "new");
 
-#if 0
-    rb_objc_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1);
-    rb_objc_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
-    rb_objc_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1);
-    rb_objc_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
-    rb_objc_define_method(rb_cRegexp, "=~", rb_reg_match_imp, 1);
-    rb_objc_define_method(rb_cRegexp, "===", rb_reg_eqq, 1);
-    rb_objc_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
-    rb_objc_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
-    rb_objc_define_method(rb_cRegexp, "source", rb_reg_source, 0);
-    rb_objc_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
-    rb_objc_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
-    rb_objc_define_method(rb_cRegexp, "encoding", rb_reg_encoding, 0);
-    rb_objc_define_method(rb_cRegexp, "fixed_encoding?",
-	    rb_reg_fixed_encoding_p, 0);
-    rb_objc_define_method(rb_cRegexp, "names", rb_reg_names, 0);
-    rb_objc_define_method(rb_cRegexp, "named_captures",
-	    rb_reg_named_captures, 0);
-#endif
-    rb_objc_define_method(rb_cRegexp, "match", (void *)rb_regexp_match, -1);
-    rb_objc_define_method(rb_cRegexp, "inspect", (void *)rb_regexp_inspect, 0);
+    match_finalize_imp_super = rb_objc_install_method2((Class)rb_cMatch,
+	    "finalize", (IMP)match_finalize_imp);
 
-    rb_cMatch  = rb_define_class("MatchData", rb_cObject);
 #if 0
-    rb_objc_define_method(*(VALUE *)rb_cMatch, "alloc", match_alloc, 0);
-    rb_undef_method(CLASS_OF(rb_cMatch), "new");
-
-    rb_objc_match_finalize_super = rb_objc_install_method2((Class)rb_cMatch,
-	    "finalize", (IMP)rb_objc_match_finalize);
-
     rb_objc_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1);
     rb_objc_define_method(rb_cMatch, "regexp", match_regexp, 0);
     rb_objc_define_method(rb_cMatch, "names", match_names, 0);
@@ -443,10 +897,9 @@
     rb_objc_define_method(rb_cMatch, "values_at", match_values_at, -1);
     rb_objc_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
     rb_objc_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
-    rb_objc_define_method(rb_cMatch, "to_s", match_to_s, 0);
-    rb_objc_define_method(rb_cMatch, "string", match_string, 0);
 #endif
-
+    rb_objc_define_method(rb_cMatch, "to_s", (void *)match_to_s, 0);
+    rb_objc_define_method(rb_cMatch, "string", (void *)match_string, 0);
     rb_objc_define_method(rb_cMatch, "inspect", (void *)match_inspect, 0);
 }
 
@@ -456,16 +909,6 @@
     return Qnil;
 }
 
-static VALUE
-rb_str_compile_regexp(VALUE str, int options, VALUE *excp)
-{
-    rb_regexp_t *regexp = regexp_alloc();
-    if (!init_from_string(regexp, str, options, excp)) {
-	return Qnil;
-    }
-    return (VALUE)regexp;
-}
-
 VALUE
 rb_reg_compile(VALUE str, int options)
 {
@@ -494,19 +937,7 @@
     return rb_reg_new_str(rb_usascii_str_new(cstr, len), options);
 }
 
-int
-rb_reg_options(VALUE re)
-{
-    return 0;
-}
-
 VALUE
-rb_reg_last_match(VALUE match)
-{
-    return rb_reg_nth_match(0, match);
-}
-
-VALUE
 rb_reg_match_last(VALUE match)
 {
     return Qnil;
@@ -530,19 +961,4 @@
     // Do nothing.
 }
 
-int
-rb_char_to_option_kcode(int c, int *option, int *kcode)
-{
-    // TODO
-    *option = 0;
-    *kcode = -1;
-    return *option;
-}
-
-VALUE
-rb_reg_eqq(VALUE re, SEL sel, VALUE str)
-{
-    return Qfalse;
-}
-
 } // extern "C"

Added: MacRuby/branches/icu/re.h
===================================================================
--- MacRuby/branches/icu/re.h	                        (rev 0)
+++ MacRuby/branches/icu/re.h	2010-02-19 03:58:06 UTC (rev 3576)
@@ -0,0 +1,23 @@
+/* 
+ * MacRuby Regular Expressions.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2010, Apple Inc. All rights reserved.
+ */
+
+#ifndef __RE_H_
+#define __RE_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+bool rb_char_to_icu_option(int c, int *option);
+VALUE regexp_eqq(VALUE rcv, SEL sel, VALUE str);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif // __RE_H_
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100218/d4e8f3ea/attachment-0001.html>