[macruby-changes] [4401] MacRuby/trunk

source_changes at macosforge.org source_changes at macosforge.org
Mon Aug 2 23:36:32 PDT 2010


Revision: 4401
          http://trac.macosforge.org/projects/ruby/changeset/4401
Author:   lsansonetti at apple.com
Date:     2010-08-02 23:36:29 -0700 (Mon, 02 Aug 2010)
Log Message:
-----------
honor the original string encoding when generating substrings out of a regexp matcher + misc fixes (patch by Vincent Isambart)

Modified Paths:
--------------
    MacRuby/trunk/encoding.c
    MacRuby/trunk/re.cpp
    MacRuby/trunk/string.c
    MacRuby/trunk/symbol.c
    MacRuby/trunk/symbol.h

Modified: MacRuby/trunk/encoding.c
===================================================================
--- MacRuby/trunk/encoding.c	2010-08-03 02:57:21 UTC (rev 4400)
+++ MacRuby/trunk/encoding.c	2010-08-03 06:36:29 UTC (rev 4401)
@@ -14,6 +14,7 @@
 #include "ruby/macruby.h"
 #include "ruby/encoding.h"
 #include "encoding.h"
+#include "symbol.h"
 
 VALUE rb_cEncoding;
 
@@ -403,10 +404,16 @@
 rb_encoding_t *
 rb_enc_get(VALUE obj)
 {
-    if (IS_RSTR(obj)) {
-	return RSTR(obj)->encoding;
+    switch (TYPE(obj)) {
+	case T_STRING:
+	    if (IS_RSTR(obj)) {
+		return RSTR(obj)->encoding;
+	    }
+	    return rb_encodings[ENCODING_UTF8];
+
+	case T_SYMBOL:
+	    return rb_enc_get(rb_sym_str(obj));
     }
-    // TODO support symbols
     return NULL;
 }
 

Modified: MacRuby/trunk/re.cpp
===================================================================
--- MacRuby/trunk/re.cpp	2010-08-03 02:57:21 UTC (rev 4400)
+++ MacRuby/trunk/re.cpp	2010-08-03 06:36:29 UTC (rev 4401)
@@ -10,6 +10,7 @@
 #include "unicode/regex.h"
 #include "unicode/unistr.h"
 #include "ruby/macruby.h"
+#include "ruby/encoding.h"
 #include "encoding.h"
 #include "objc.h"
 #include "re.h"
@@ -586,6 +587,7 @@
     struct RBasic basic;
     UnicodeString *unistr;
     RegexMatcher *matcher;
+    rb_encoding_t *str_enc;
 } rb_regexp_matcher_t;
 
 static IMP regexp_matcher_finalize_imp_super = NULL; 
@@ -628,6 +630,7 @@
 
     matcher->matcher = regexp_matcher;
     matcher->unistr = unistr;
+    matcher->str_enc = rb_enc_get(str);
 
     return (VALUE)matcher;
 }
@@ -718,6 +721,7 @@
     }
 
     rb_str_set_len(RMATCH(match)->str, 0);
+    rb_str_force_encoding(RMATCH(match)->str, re_matcher->str_enc);
     rb_str_append_uchars(RMATCH(match)->str, re_matcher->unistr->getBuffer(),
 	    re_matcher->unistr->length());
 

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2010-08-03 02:57:21 UTC (rev 4400)
+++ MacRuby/trunk/string.c	2010-08-03 06:36:29 UTC (rev 4401)
@@ -828,9 +828,20 @@
 static void
 str_concat_uchars(rb_str_t *self, const UChar *chars, long len)
 {
-    assert(str_try_making_data_uchars(self));
+    if (str_try_making_data_uchars(self)) {
+	str_concat_bytes(self, (const char *)chars, UCHARS_TO_BYTES(len)); 
+    }
+    else {
+	assert(BINARY_ENC(RSTR(self)->encoding));
+	const long new_length_in_bytes = RSTR(self)->length_in_bytes + len;
 
-    str_concat_bytes(self, (const char *)chars, UCHARS_TO_BYTES(len)); 
+	str_resize_bytes(self, new_length_in_bytes);
+	char *ptr = (RSTR(self)->data.bytes + RSTR(self)->length_in_bytes);
+	for (int i = 0; i < len; ++i) {
+	    ptr[i] = chars[i];
+	}
+	self->length_in_bytes = new_length_in_bytes;
+    }
 }
 
 static void

Modified: MacRuby/trunk/symbol.c
===================================================================
--- MacRuby/trunk/symbol.c	2010-08-03 02:57:21 UTC (rev 4400)
+++ MacRuby/trunk/symbol.c	2010-08-03 06:36:29 UTC (rev 4401)
@@ -837,3 +837,9 @@
     id |= ID_ATTRSET;
     return id;
 }
+
+VALUE
+rb_sym_str(VALUE sym)
+{
+    return RSYM(sym)->str;
+}

Modified: MacRuby/trunk/symbol.h
===================================================================
--- MacRuby/trunk/symbol.h	2010-08-03 02:57:21 UTC (rev 4400)
+++ MacRuby/trunk/symbol.h	2010-08-03 06:36:29 UTC (rev 4401)
@@ -47,6 +47,8 @@
 // Defined in parse.y.
 extern struct rb_op_tbl_entry rb_op_tbl[];
 
+VALUE rb_sym_str(VALUE sym);
+
 #if defined(__cplusplus)
 } // extern "C"
 #endif
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100802/06f8429b/attachment.html>


More information about the macruby-changes mailing list