Revision: 4401 http://trac.macosforge.org/projects/ruby/changeset/4401 Author: lsansonetti@apple.com Date: 2010-08-02 23:36:29 -0700 (Mon, 02 Aug 2010) Log Message: ----------- honor the original string encoding when generating substrings out of a regexp matcher + misc fixes (patch by Vincent Isambart) Modified Paths: -------------- MacRuby/trunk/encoding.c MacRuby/trunk/re.cpp MacRuby/trunk/string.c MacRuby/trunk/symbol.c MacRuby/trunk/symbol.h Modified: MacRuby/trunk/encoding.c =================================================================== --- MacRuby/trunk/encoding.c 2010-08-03 02:57:21 UTC (rev 4400) +++ MacRuby/trunk/encoding.c 2010-08-03 06:36:29 UTC (rev 4401) @@ -14,6 +14,7 @@ #include "ruby/macruby.h" #include "ruby/encoding.h" #include "encoding.h" +#include "symbol.h" VALUE rb_cEncoding; @@ -403,10 +404,16 @@ rb_encoding_t * rb_enc_get(VALUE obj) { - if (IS_RSTR(obj)) { - return RSTR(obj)->encoding; + switch (TYPE(obj)) { + case T_STRING: + if (IS_RSTR(obj)) { + return RSTR(obj)->encoding; + } + return rb_encodings[ENCODING_UTF8]; + + case T_SYMBOL: + return rb_enc_get(rb_sym_str(obj)); } - // TODO support symbols return NULL; } Modified: MacRuby/trunk/re.cpp =================================================================== --- MacRuby/trunk/re.cpp 2010-08-03 02:57:21 UTC (rev 4400) +++ MacRuby/trunk/re.cpp 2010-08-03 06:36:29 UTC (rev 4401) @@ -10,6 +10,7 @@ #include "unicode/regex.h" #include "unicode/unistr.h" #include "ruby/macruby.h" +#include "ruby/encoding.h" #include "encoding.h" #include "objc.h" #include "re.h" @@ -586,6 +587,7 @@ struct RBasic basic; UnicodeString *unistr; RegexMatcher *matcher; + rb_encoding_t *str_enc; } rb_regexp_matcher_t; static IMP regexp_matcher_finalize_imp_super = NULL; @@ -628,6 +630,7 @@ matcher->matcher = regexp_matcher; matcher->unistr = unistr; + matcher->str_enc = rb_enc_get(str); return (VALUE)matcher; } @@ -718,6 +721,7 @@ } rb_str_set_len(RMATCH(match)->str, 0); + rb_str_force_encoding(RMATCH(match)->str, re_matcher->str_enc); rb_str_append_uchars(RMATCH(match)->str, re_matcher->unistr->getBuffer(), re_matcher->unistr->length()); Modified: MacRuby/trunk/string.c =================================================================== --- MacRuby/trunk/string.c 2010-08-03 02:57:21 UTC (rev 4400) +++ MacRuby/trunk/string.c 2010-08-03 06:36:29 UTC (rev 4401) @@ -828,9 +828,20 @@ static void str_concat_uchars(rb_str_t *self, const UChar *chars, long len) { - assert(str_try_making_data_uchars(self)); + if (str_try_making_data_uchars(self)) { + str_concat_bytes(self, (const char *)chars, UCHARS_TO_BYTES(len)); + } + else { + assert(BINARY_ENC(RSTR(self)->encoding)); + const long new_length_in_bytes = RSTR(self)->length_in_bytes + len; - str_concat_bytes(self, (const char *)chars, UCHARS_TO_BYTES(len)); + str_resize_bytes(self, new_length_in_bytes); + char *ptr = (RSTR(self)->data.bytes + RSTR(self)->length_in_bytes); + for (int i = 0; i < len; ++i) { + ptr[i] = chars[i]; + } + self->length_in_bytes = new_length_in_bytes; + } } static void Modified: MacRuby/trunk/symbol.c =================================================================== --- MacRuby/trunk/symbol.c 2010-08-03 02:57:21 UTC (rev 4400) +++ MacRuby/trunk/symbol.c 2010-08-03 06:36:29 UTC (rev 4401) @@ -837,3 +837,9 @@ id |= ID_ATTRSET; return id; } + +VALUE +rb_sym_str(VALUE sym) +{ + return RSYM(sym)->str; +} Modified: MacRuby/trunk/symbol.h =================================================================== --- MacRuby/trunk/symbol.h 2010-08-03 02:57:21 UTC (rev 4400) +++ MacRuby/trunk/symbol.h 2010-08-03 06:36:29 UTC (rev 4401) @@ -47,6 +47,8 @@ // Defined in parse.y. extern struct rb_op_tbl_entry rb_op_tbl[]; +VALUE rb_sym_str(VALUE sym); + #if defined(__cplusplus) } // extern "C" #endif