<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[2036] MacRuby/branches/experimental</title>
</head>
<body>
<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.macosforge.org/projects/ruby/changeset/2036">2036</a></dd>
<dt>Author</dt> <dd>lsansonetti@apple.com</dd>
<dt>Date</dt> <dd>2009-07-20 13:49:50 -0700 (Mon, 20 Jul 2009)</dd>
</dl>
<h3>Log Message</h3>
<pre>some work on unicode support (still in progress)</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#MacRubybranchesexperimentalcompilercpp">MacRuby/branches/experimental/compiler.cpp</a></li>
<li><a href="#MacRubybranchesexperimentalcompilerh">MacRuby/branches/experimental/compiler.h</a></li>
<li><a href="#MacRubybranchesexperimentalioc">MacRuby/branches/experimental/io.c</a></li>
<li><a href="#MacRubybranchesexperimentalparsey">MacRuby/branches/experimental/parse.y</a></li>
<li><a href="#MacRubybranchesexperimentalrec">MacRuby/branches/experimental/re.c</a></li>
<li><a href="#MacRubybranchesexperimentalspecmacrubycorehash_specrb">MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb</a></li>
<li><a href="#MacRubybranchesexperimentalstringc">MacRuby/branches/experimental/string.c</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="MacRubybranchesexperimentalcompilercpp"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/compiler.cpp (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/compiler.cpp        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.cpp        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -138,7 +138,7 @@
</span><span class="cx"> falseVal = ConstantInt::get(RubyObjTy, Qfalse);
</span><span class="cx"> undefVal = ConstantInt::get(RubyObjTy, Qundef);
</span><span class="cx"> splatArgFollowsVal = ConstantInt::get(RubyObjTy, SPLAT_ARG_FOLLOWS);
</span><del>- cObject = ConstantInt::get(RubyObjTy, (long)rb_cObject);
</del><ins>+ cObject = ConstantInt::get(RubyObjTy, rb_cObject);
</ins><span class="cx"> PtrTy = PointerType::getUnqual(Type::Int8Ty);
</span><span class="cx"> PtrPtrTy = PointerType::getUnqual(PtrTy);
</span><span class="cx"> Int32PtrTy = PointerType::getUnqual(Type::Int32Ty);
</span><span class="lines">@@ -407,21 +407,56 @@
</span><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> GlobalVariable *
</span><ins>+RoxorCompiler::compile_const_global_ustring(const UniChar *str,
+        const size_t len, CFHashCode hash)
+{
+ assert(len > 0);
+
+ std::map<CFHashCode, GlobalVariable *>::iterator iter =
+        static_ustrings.find(hash);
+
+ GlobalVariable *gvar;
+ if (iter == static_ustrings.end()) {
+        const ArrayType *str_type = ArrayType::get(Type::Int16Ty, len);
+
+        std::vector<Constant *> ary_elements;
+        for (unsigned int i = 0; i < len; i++) {
+         ary_elements.push_back(ConstantInt::get(Type::Int16Ty, str[i]));
+        }
+
+        gvar = new GlobalVariable(
+                str_type,
+                true,
+                GlobalValue::InternalLinkage,
+                ConstantArray::get(str_type, ary_elements),
+                "",
+                RoxorCompiler::module);
+
+        static_ustrings[hash] = gvar;
+ }
+ else {
+        gvar = iter->second;
+ }
+
+ return gvar;
+}
+
+GlobalVariable *
</ins><span class="cx"> RoxorCompiler::compile_const_global_string(const char *str,
</span><del>-        const size_t str_len)
</del><ins>+        const size_t len)
</ins><span class="cx"> {
</span><del>- assert(str_len > 0);
</del><ins>+ assert(len > 0);
</ins><span class="cx">
</span><del>- std::string s(str, str_len);
</del><ins>+ std::string s(str, len);
</ins><span class="cx"> std::map<std::string, GlobalVariable *>::iterator iter =
</span><span class="cx">         static_strings.find(s);
</span><span class="cx">
</span><span class="cx"> GlobalVariable *gvar;
</span><span class="cx"> if (iter == static_strings.end()) {
</span><del>-        const ArrayType *str_type = ArrayType::get(Type::Int8Ty, str_len + 1);
</del><ins>+        const ArrayType *str_type = ArrayType::get(Type::Int8Ty, len + 1);
</ins><span class="cx">
</span><span class="cx">         std::vector<Constant *> ary_elements;
</span><del>-        for (unsigned int i = 0; i < str_len; i++) {
</del><ins>+        for (unsigned int i = 0; i < len; i++) {
</ins><span class="cx">          ary_elements.push_back(ConstantInt::get(Type::Int8Ty, str[i]));
</span><span class="cx">         }
</span><span class="cx">         ary_elements.push_back(ConstantInt::get(Type::Int8Ty, 0));
</span><span class="lines">@@ -2492,7 +2527,6 @@
</span><span class="cx">         //
</span><span class="cx">         //        10.times { s = 'foo'; s << 'bar' }
</span><span class="cx">         //
</span><del>-        const char *str = RSTRING_PTR(val);
</del><span class="cx">         const size_t str_len = RSTRING_LEN(val);
</span><span class="cx">         if (str_len == 0) {
</span><span class="cx">          if (newString3Func == NULL) {        
</span><span class="lines">@@ -2503,9 +2537,18 @@
</span><span class="cx">          return CallInst::Create(newString3Func, "", bb);
</span><span class="cx">         }
</span><span class="cx">         else {
</span><del>-         GlobalVariable *str_gvar = compile_const_global_string(str,
-                 str_len);
</del><ins>+         UniChar *buf = (UniChar *)CFStringGetCharactersPtr(
+                 (CFStringRef)val);
</ins><span class="cx">
</span><ins>+         if (buf == NULL) {
+                buf = (UniChar *)alloca(sizeof(UniChar) * str_len);
+                CFStringGetCharacters((CFStringRef)val,
+                        CFRangeMake(0, str_len), buf);
+         }
+
+         GlobalVariable *str_gvar = compile_const_global_ustring(buf,
+                 str_len, CFHash((CFTypeRef)val));
+
</ins><span class="cx">          std::vector<Value *> idxs;
</span><span class="cx">          idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
</span><span class="cx">          idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
</span><span class="lines">@@ -2515,8 +2558,9 @@
</span><span class="cx">          if (newString2Func == NULL) {        
</span><span class="cx">                 newString2Func = cast<Function>(
</span><span class="cx">                         module->getOrInsertFunction(
</span><del>-                         "rb_str_new", RubyObjTy, PtrTy, Type::Int32Ty,
-                         NULL));
</del><ins>+                         "rb_unicode_str_new",
+                         RubyObjTy, PointerType::getUnqual(Type::Int16Ty),
+                         Type::Int32Ty, NULL));
</ins><span class="cx">          }
</span><span class="cx">
</span><span class="cx">          std::vector<Value *> params;
</span></span></pre></div>
<a id="MacRubybranchesexperimentalcompilerh"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/compiler.h (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/compiler.h        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.h        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -66,6 +66,7 @@
</span><span class="cx">         std::vector<ID> dvars;
</span><span class="cx">         std::map<ID, Instruction *> ivar_slots_cache;
</span><span class="cx">         std::map<std::string, GlobalVariable *> static_strings;
</span><ins>+        std::map<CFHashCode, GlobalVariable *> static_ustrings;
</ins><span class="cx">
</span><span class="cx"> #if ROXOR_COMPILER_DEBUG
</span><span class="cx">         int level;
</span><span class="lines">@@ -249,6 +250,9 @@
</span><span class="cx">         GlobalVariable *compile_const_global_string(const char *str) {
</span><span class="cx">          return compile_const_global_string(str, strlen(str));
</span><span class="cx">         }
</span><ins>+        GlobalVariable *compile_const_global_ustring(const UniChar *str,
+                const size_t str_len, CFHashCode hash);
+
</ins><span class="cx">         Value *compile_arity(rb_vm_arity_t &arity);
</span><span class="cx">         Value *compile_literal(VALUE val);
</span><span class="cx">         virtual Value *compile_immutable_literal(VALUE val);
</span></span></pre></div>
<a id="MacRubybranchesexperimentalioc"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/io.c (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/io.c        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/io.c        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -460,22 +460,11 @@
</span><span class="cx"> }
</span><span class="cx"> else {
</span><span class="cx">         buffer = (UInt8 *)RSTRING_PTR(to_write);
</span><del>-        if (buffer != NULL) {
-         length = RSTRING_LEN(to_write);
</del><ins>+        if (buffer == NULL) {
+         rb_raise(rb_eRuntimeError,
+                 "could not extract a string from the read data.");
</ins><span class="cx">         }
</span><del>-        else {
-         const long max = CFStringGetMaximumSizeForEncoding(
-                 CFStringGetLength((CFStringRef)to_write),
-                 kCFStringEncodingUTF8);
-
-         buffer = (UInt8 *)alloca(max + 1);
-         if (!CFStringGetCString((CFStringRef)to_write, (char *)buffer,
-                        max, kCFStringEncodingUTF8)) {
-                rb_raise(rb_eRuntimeError,
-                        "could not extract a string from the read data.");
-         }
-         length = strlen((char *)buffer);
-        }
</del><ins>+        length = strlen((char *)buffer);
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> if (length == 0) {
</span></span></pre></div>
<a id="MacRubybranchesexperimentalparsey"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/parse.y (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/parse.y        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/parse.y        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -290,14 +290,34 @@
</span><span class="cx">
</span><span class="cx"> #if WITH_OBJC
</span><span class="cx"> # define UTF8_ENC() (NULL)
</span><ins>+static inline VALUE
+__new_tmp_str(const char *ptr, const size_t len)
+{
+ if (ptr != NULL) {
+        CFStringRef str = CFStringCreateWithBytes(NULL, (UInt8 *)ptr, len,
+                kCFStringEncodingUTF8, false);
+        if (str != NULL) {
+         CFMutableStringRef str2 =
+                CFStringCreateMutableCopy(NULL, 0, str);
+         assert(str2 != NULL);
+         CFRelease(str);
+         return (VALUE)CFMakeCollectable(str2);
+        }
+ }
+ return rb_usascii_str_new(ptr, len);
+}
+# define STR_NEW(p,n) __new_tmp_str(p, n)
+# define STR_NEW0() __new_tmp_str(0, 0)
+# define STR_NEW2(p) __new_tmp_str(p, strlen(p))
+# define STR_NEW3(p,n,e,func) __new_tmp_str(p, n)
</ins><span class="cx"> #else
</span><span class="cx"> # define UTF8_ENC() (parser->utf8 ? parser->utf8 : \
</span><span class="cx">                  (parser->utf8 = rb_utf8_encoding()))
</span><ins>+# define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
+# define STR_NEW0() rb_usascii_str_new(0,0)
+# define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
+# define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
</ins><span class="cx"> #endif
</span><del>-#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
-#define STR_NEW0() rb_usascii_str_new(0,0)
-#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
-#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
</del><span class="cx"> #if WITH_OBJC
</span><span class="cx"> # define STR_ENC(m) (parser->enc)
</span><span class="cx"> # define ENC_SINGLE(cr) (1)
</span><span class="lines">@@ -5238,6 +5258,7 @@
</span><span class="cx"> str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND),
</span><span class="cx"> };
</span><span class="cx">
</span><ins>+#if 0
</ins><span class="cx"> static VALUE
</span><span class="cx"> parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0)
</span><span class="cx"> {
</span><span class="lines">@@ -5266,6 +5287,7 @@
</span><span class="cx">
</span><span class="cx"> return str;
</span><span class="cx"> }
</span><ins>+#endif
</ins><span class="cx">
</span><span class="cx"> #define lex_goto_eol(parser) (parser->parser_lex_p = parser->parser_lex_pend)
</span><span class="cx">
</span></span></pre></div>
<a id="MacRubybranchesexperimentalrec"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/re.c (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/re.c        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/re.c        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -9,6 +9,7 @@
</span><span class="cx">
</span><span class="cx"> **********************************************************************/
</span><span class="cx">
</span><ins>+#include "oniguruma.h"
</ins><span class="cx"> #include "ruby/ruby.h"
</span><span class="cx"> #include "ruby/re.h"
</span><span class="cx"> #include "ruby/encoding.h"
</span><span class="lines">@@ -449,10 +450,13 @@
</span><span class="cx"> rb_reg_check(re);
</span><span class="cx"> cstr = RREGEXP(re)->str;
</span><span class="cx"> clen = RREGEXP(re)->len;
</span><del>- if (clen == 0)
</del><ins>+ if (clen == 0) {
</ins><span class="cx">         cstr = NULL;
</span><ins>+ }
</ins><span class="cx"> str = rb_enc_str_new(cstr, clen, rb_enc_get(re));
</span><del>- if (OBJ_TAINTED(re)) OBJ_TAINT(str);
</del><ins>+ if (OBJ_TAINTED(re)) {
+        OBJ_TAINT(str);
+ }
</ins><span class="cx"> return str;
</span><span class="cx"> }
</span><span class="cx">
</span><span class="lines">@@ -772,7 +776,8 @@
</span><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> static Regexp*
</span><del>-make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err)
</del><ins>+make_regexp(const char *s, long len, rb_encoding *enc, int flags,
+        onig_errmsg_buffer err)
</ins><span class="cx"> {
</span><span class="cx"> Regexp *rp;
</span><span class="cx"> int r;
</span><span class="lines">@@ -1228,9 +1233,88 @@
</span><span class="cx"> }
</span><span class="cx"> #endif
</span><span class="cx">
</span><ins>+static void
+get_cstring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+        bool *should_free)
+{
+ if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
+        char *p = (char *)CFStringGetCStringPtr((CFStringRef)str, enc);
+        if (p != NULL) {
+         *should_free = false;
+        }
+        else {
+         const size_t s = CFStringGetMaximumSizeForEncoding(
+                 CFStringGetLength((CFStringRef)str), enc);
+         p = (char *)malloc(s + 1);
+         assert(CFStringGetCString((CFStringRef)str, p, s + 1, enc));
+         *should_free = true;
+        }
+        *pcstr = p;
+        *pcharsize = sizeof(char);
+ }
+}
+
+static void
+get_unistring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+        bool *should_free)
+{
+ if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
+        UniChar *p = (UniChar *)CFStringGetCharactersPtr((CFStringRef)str);
+        const size_t str_len = CFStringGetLength((CFStringRef)str);
+        if (p != NULL) {
+         *should_free = false;
+        }
+        else {
+         const size_t s = CFStringGetMaximumSizeForEncoding(
+                 str_len, enc);
+         p = (UniChar *)malloc(s);
+         CFStringGetCharacters((CFStringRef)str,
+                 CFRangeMake(0, str_len),
+                 p);
+         *should_free = true;
+        }
+        *pcstr = (char *)p;
+        *pcharsize = sizeof(UniChar);
+ }
+}
+
+static inline bool
+multibyte_encoding(rb_encoding *enc)
+{
+ return enc == (rb_encoding *)ONIG_ENCODING_UTF16_BE
+        || enc == (rb_encoding *)ONIG_ENCODING_UTF16_LE
+        || enc == (rb_encoding *)ONIG_ENCODING_UTF32_BE
+        || enc == (rb_encoding *)ONIG_ENCODING_UTF32_LE;
+}
+
</ins><span class="cx"> static rb_encoding*
</span><del>-rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
</del><ins>+rb_reg_prepare_enc(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+        bool *should_free)
</ins><span class="cx"> {
</span><ins>+ CFStringEncoding enc = CFStringGetFastestEncoding((CFStringRef)str);
+ switch (enc) {
+        case kCFStringEncodingMacRoman:
+        case kCFStringEncodingWindowsLatin1:
+        case kCFStringEncodingISOLatin1:
+        case kCFStringEncodingNextStepLatin:
+        case kCFStringEncodingASCII:
+        case kCFStringEncodingNonLossyASCII:
+         get_cstring(str, enc, pcstr, pcharsize, should_free);
+         return (rb_encoding *)ONIG_ENCODING_ASCII;
+
+        case kCFStringEncodingUTF8:
+        case kCFStringEncodingUTF16:
+        case kCFStringEncodingUTF16BE:
+        case kCFStringEncodingUTF16LE:
+        case kCFStringEncodingUTF32:
+        case kCFStringEncodingUTF32BE:
+        case kCFStringEncodingUTF32LE:
+         get_unistring(str, enc, pcstr, pcharsize, should_free);
+         return (rb_encoding *)ONIG_ENCODING_UTF16_LE;
+ }
+
+ rb_raise(rb_eArgError, "given string has unrecognized encoding");
+#if 0
</ins><span class="cx"> rb_encoding *enc = 0;
</span><span class="cx">
</span><span class="cx"> #if !WITH_OBJC
</span><span class="lines">@@ -1265,10 +1349,12 @@
</span><span class="cx"> }
</span><span class="cx"> #endif
</span><span class="cx"> return enc;
</span><ins>+#endif
</ins><span class="cx"> }
</span><span class="cx">
</span><del>-regex_t *
-rb_reg_prepare_re(VALUE re, VALUE str)
</del><ins>+static regex_t *
+rb_reg_prepare_re(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+        bool *should_free)
</ins><span class="cx"> {
</span><span class="cx"> regex_t *reg = RREGEXP(re)->ptr;
</span><span class="cx"> onig_errmsg_buffer err = "";
</span><span class="lines">@@ -1277,38 +1363,49 @@
</span><span class="cx"> const char *pattern;
</span><span class="cx"> VALUE unescaped;
</span><span class="cx"> rb_encoding *fixed_enc = 0;
</span><del>- rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
</del><ins>+ rb_encoding *enc = rb_reg_prepare_enc(re, str, pcstr, pcharsize,
+         should_free);
</ins><span class="cx">
</span><del>-#if !WITH_OBJC
- if (reg->enc == enc) return reg;
-#endif
</del><ins>+ if ((rb_encoding *)reg->enc == enc) {
+        return reg;
+ }
</ins><span class="cx">
</span><span class="cx"> rb_reg_check(re);
</span><span class="cx"> reg = RREGEXP(re)->ptr;
</span><span class="cx"> pattern = RREGEXP(re)->str;
</span><span class="cx">
</span><del>- unescaped = rb_reg_preprocess(
-        pattern, pattern + RREGEXP(re)->len, enc,
</del><ins>+ unescaped = rb_reg_preprocess(pattern, pattern + RREGEXP(re)->len, enc,
</ins><span class="cx">         &fixed_enc, err);
</span><span class="cx">
</span><span class="cx"> if (unescaped == Qnil) {
</span><span class="cx">         rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
</span><span class="cx"> }
</span><span class="cx">
</span><del>-#if WITH_OBJC
- enc = (rb_encoding *)ONIG_ENCODING_ASCII;
-#endif
</del><ins>+ UChar *begin, *end;
+ if (multibyte_encoding(enc)) {
+        UniChar *chars = (UniChar *)CFStringGetCharactersPtr(
+                (CFStringRef)unescaped);
+        const long len = RSTRING_LEN(unescaped);
+        if (chars == NULL) {
+         chars = (UniChar *)alloca(sizeof(UniChar) * len);
+         CFStringGetCharacters((CFStringRef)unescaped,
+                 CFRangeMake(0, len), chars);
+        }
+        begin = (UChar *)chars;
+        end = (UChar *)chars + (sizeof(UniChar) * len);
+ }
+ else {
+        begin = (UChar *)RSTRING_PTR(unescaped);
+        end = begin + RSTRING_LEN(unescaped);
+ }
</ins><span class="cx">
</span><del>- r = onig_new(&reg, (UChar* )RSTRING_PTR(unescaped),
-                 (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)),
-                 reg->options, (OnigEncoding)enc,
-                 OnigDefaultSyntax, &einfo);
- if (r) {
</del><ins>+ r = onig_new(&reg, begin, end, reg->options, (OnigEncoding)enc,
+         OnigDefaultSyntax, &einfo);
+ if (r != 0) {
</ins><span class="cx">         onig_error_code_to_str((UChar*)err, r, &einfo);
</span><span class="cx">         rb_reg_raise(pattern, RREGEXP(re)->len, err, re);
</span><span class="cx"> }
</span><span class="cx">
</span><del>- RB_GC_GUARD(unescaped);
</del><span class="cx"> return reg;
</span><span class="cx"> }
</span><span class="cx">
</span><span class="lines">@@ -1321,7 +1418,7 @@
</span><span class="cx"> UChar *p, *string;
</span><span class="cx"> #endif
</span><span class="cx">
</span><del>- enc = rb_reg_prepare_enc(re, str, 0);
</del><ins>+ enc = rb_reg_prepare_enc(re, str, NULL, NULL, NULL);
</ins><span class="cx">
</span><span class="cx"> if (reverse) {
</span><span class="cx">         range = -pos;
</span><span class="lines">@@ -1350,47 +1447,45 @@
</span><span class="cx"> int
</span><span class="cx"> rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
</span><span class="cx"> {
</span><del>- int result;
- VALUE match;
- struct re_registers *pregs;
- const char *cstr, *range;
- long clen;
</del><span class="cx"> regex_t *reg0 = RREGEXP(re)->ptr, *reg;
</span><span class="cx"> int busy = FL_TEST(re, REG_BUSY);
</span><span class="cx">
</span><del>- cstr = range = RSTRING_PTR(str);
- clen = RSTRING_LEN(str);
-#if WITH_OBJC
</del><span class="cx"> static struct re_registers *regs = NULL;
</span><span class="cx"> if (regs == NULL) {
</span><span class="cx">         regs = xmalloc(sizeof(struct re_registers));
</span><span class="cx">         rb_objc_root(&regs);
</span><span class="cx"> }
</span><del>- pregs = regs;
-#else
- static struct re_registers regs;
- pregs = &regs;
-#endif
</del><ins>+ struct re_registers *pregs = regs;
</ins><span class="cx">
</span><ins>+ const size_t clen = RSTRING_LEN(str);
</ins><span class="cx"> if (pos > clen || pos < 0) {
</span><span class="cx">         rb_backref_set(Qnil);
</span><span class="cx">         return -1;
</span><span class="cx"> }
</span><span class="cx">
</span><del>- reg = rb_reg_prepare_re(re, str);
</del><ins>+ char *cstr = NULL;
+ size_t charsize = 0;
+ bool should_free = false;
+ reg = rb_reg_prepare_re(re, str, &cstr, &charsize, &should_free);
</ins><span class="cx">
</span><ins>+ char *range = cstr;
</ins><span class="cx"> FL_SET(re, REG_BUSY);
</span><span class="cx"> if (!reverse) {
</span><del>-        range += RSTRING_LEN(str);
</del><ins>+        range += (clen * charsize);
</ins><span class="cx"> }
</span><span class="cx"> MEMZERO(pregs, struct re_registers, 1);
</span><del>- result = onig_search(RREGEXP(re)->ptr,
-                         (UChar*)cstr,
-                         ((UChar*)cstr + clen),
-                         ((UChar*)cstr + pos),
-                         ((UChar*)range),
-                         pregs, ONIG_OPTION_NONE);
</del><ins>+ int result = onig_search(reg,
+         (UChar*)cstr,
+         ((UChar*)cstr + (clen * charsize)),
+         ((UChar*)cstr + (pos * charsize)),
+         ((UChar*)range),
+         pregs, ONIG_OPTION_NONE);
</ins><span class="cx">
</span><ins>+ if (should_free) {
+        free(cstr);
+        cstr = NULL;
+ }
+
</ins><span class="cx"> if (RREGEXP(re)->ptr != reg) {
</span><span class="cx">         if (busy) {
</span><span class="cx">          onig_free(reg);
</span><span class="lines">@@ -1400,7 +1495,9 @@
</span><span class="cx">          RREGEXP(re)->ptr = reg;
</span><span class="cx">         }
</span><span class="cx"> }
</span><del>- if (!busy) FL_UNSET(re, REG_BUSY);
</del><ins>+ if (!busy) {
+        FL_UNSET(re, REG_BUSY);
+ }
</ins><span class="cx"> if (result < 0) {
</span><span class="cx">         onig_region_free(pregs, 0);
</span><span class="cx">         if (result == ONIG_MISMATCH) {
</span><span class="lines">@@ -1414,10 +1511,26 @@
</span><span class="cx">         }
</span><span class="cx"> }
</span><span class="cx">
</span><ins>+ if (charsize > 1) {
+        int i;
+        for (i = 0; i < pregs->num_regs; i++) {
+         if (pregs->beg[i] > 0) {
+                assert((pregs->beg[i] % charsize) == 0);
+                pregs->beg[i] /= charsize;
+         }
+         if (pregs->end[i] > 0) {
+                assert((pregs->end[i] % charsize) == 0);
+                pregs->end[i] /= charsize;
+         }
+        }
+        assert((result % charsize) == 0);
+        result /= charsize;
+ }
+
</ins><span class="cx"> #if WITH_OBJC
</span><del>- match = match_alloc(rb_cMatch, 0);
</del><ins>+ VALUE match = match_alloc(rb_cMatch, 0);
</ins><span class="cx"> #else
</span><del>- match = rb_backref_get();
</del><ins>+ VALUE match = rb_backref_get();
</ins><span class="cx"> if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
</span><span class="cx">         match = match_alloc(rb_cMatch);
</span><span class="cx"> }
</span><span class="lines">@@ -1467,7 +1580,9 @@
</span><span class="cx"> long start, end, len;
</span><span class="cx"> struct re_registers *regs;
</span><span class="cx">
</span><del>- if (NIL_P(match)) return Qnil;
</del><ins>+ if (NIL_P(match)) {
+        return Qnil;
+ }
</ins><span class="cx"> match_check(match);
</span><span class="cx"> regs = RMATCH_REGS(match);
</span><span class="cx"> if (nth >= regs->num_regs) {
</span><span class="lines">@@ -1475,14 +1590,17 @@
</span><span class="cx"> }
</span><span class="cx"> if (nth < 0) {
</span><span class="cx">         nth += regs->num_regs;
</span><del>-        if (nth <= 0) return Qnil;
</del><ins>+        if (nth <= 0) {
+         return Qnil;
+        }
</ins><span class="cx"> }
</span><span class="cx"> start = BEG(nth);
</span><del>- if (start == -1) return Qnil;
</del><ins>+ if (start == -1) {
+        return Qnil;
+ }
</ins><span class="cx"> end = END(nth);
</span><span class="cx"> len = end - start;
</span><span class="cx"> str = rb_str_subseq(RMATCH(match)->str, start, len);
</span><del>- OBJ_INFECT(str, match);
</del><span class="cx"> return str;
</span><span class="cx"> }
</span><span class="cx">
</span><span class="lines">@@ -1794,9 +1912,15 @@
</span><span class="cx"> VALUE str = rb_reg_last_match(match);
</span><span class="cx">
</span><span class="cx"> match_check(match);
</span><del>- if (NIL_P(str)) str = rb_str_new(0,0);
- if (OBJ_TAINTED(match)) OBJ_TAINT(str);
- if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
</del><ins>+ if (NIL_P(str)) {
+        str = rb_str_new(0,0);
+ }
+ if (OBJ_TAINTED(match)) {
+        OBJ_TAINT(str);
+ }
+ else if (OBJ_TAINTED(RMATCH(match)->str)) {
+        OBJ_TAINT(str);
+ }
</ins><span class="cx"> return str;
</span><span class="cx"> }
</span><span class="cx">
</span><span class="lines">@@ -1886,9 +2010,10 @@
</span><span class="cx"> for (i = 0; i < num_regs; i++) {
</span><span class="cx"> VALUE v;
</span><span class="cx"> rb_str_buf_cat2(str, " ");
</span><del>- if (0 < i) {
- if (names[i].name)
</del><ins>+ if (i > 0) {
+ if (names[i].name) {
</ins><span class="cx"> rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
</span><ins>+         }
</ins><span class="cx"> else {
</span><span class="cx"> char buf[sizeof(i)*3+1];
</span><span class="cx"> snprintf(buf, sizeof(buf), "%d", i);
</span><span class="lines">@@ -1897,10 +2022,12 @@
</span><span class="cx"> rb_str_buf_cat2(str, ":");
</span><span class="cx"> }
</span><span class="cx"> v = rb_reg_nth_match(i, match);
</span><del>- if (v == Qnil)
</del><ins>+ if (v == Qnil) {
</ins><span class="cx"> rb_str_buf_cat2(str, "nil");
</span><del>- else
</del><ins>+        }
+ else {
</ins><span class="cx"> rb_str_buf_append(str, rb_str_inspect(v, 0));
</span><ins>+        }
</ins><span class="cx"> }
</span><span class="cx"> rb_str_buf_cat2(str, ">");
</span><span class="cx">
</span><span class="lines">@@ -2324,7 +2451,6 @@
</span><span class="cx"> enc = rb_enc_get(str);
</span><span class="cx">
</span><span class="cx"> buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
</span><del>- RB_GC_GUARD(str);
</del><span class="cx">
</span><span class="cx"> if (buf == Qnil) {
</span><span class="cx">         return rb_reg_error_desc(str, 0, err);
</span><span class="lines">@@ -2404,15 +2530,21 @@
</span><span class="cx"> rb_encoding *a_enc = rb_ascii8bit_encoding();
</span><span class="cx"> #endif
</span><span class="cx">
</span><del>- if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)
</del><ins>+ if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4) {
</ins><span class="cx">         rb_raise(rb_eSecurityError, "Insecure: can't modify regexp");
</span><ins>+ }
</ins><span class="cx"> rb_check_frozen(obj);
</span><del>- if (FL_TEST(obj, REG_LITERAL))
</del><ins>+ if (FL_TEST(obj, REG_LITERAL)) {
</ins><span class="cx">         rb_raise(rb_eSecurityError, "can't modify literal regexp");
</span><del>- if (re->ptr) onig_free(re->ptr);
- if (re->str) xfree(re->str);
- re->ptr = 0;
- re->str = 0;
</del><ins>+ }
+ if (re->ptr != NULL) {
+        onig_free(re->ptr);
+ }
+ if (re->str != NULL) {
+        xfree(re->str);
+ }
+ re->ptr = NULL;
+ re->str = NULL;
</ins><span class="cx">
</span><span class="cx"> unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
</span><span class="cx"> if (unescaped == Qnil)
</span><span class="lines">@@ -2444,23 +2576,24 @@
</span><span class="cx"> if (options & ARG_ENCODING_NONE) {
</span><span class="cx"> re->basic.flags |= REG_ENCODING_NONE;
</span><span class="cx"> }
</span><del>-
- GC_WB(&re->ptr, make_regexp(RSTRING_PTR(unescaped),
-                                RSTRING_LEN(unescaped), enc,
- options & ARG_REG_OPTION_MASK, err));
- if (!re->ptr) return -1;
</del><ins>+
+ Regexp *reg = make_regexp(RSTRING_PTR(unescaped),
+         RSTRING_LEN(unescaped), enc,
+         options & ARG_REG_OPTION_MASK, err);
+ if (reg == NULL) {
+        return -1;
+ }
+ GC_WB(&re->ptr, reg);
</ins><span class="cx"> GC_WB(&re->str, ALLOC_N(char, len+1));
</span><span class="cx"> memcpy(re->str, s, len);
</span><span class="cx"> re->str[len] = '\0';
</span><span class="cx"> re->len = len;
</span><del>- RB_GC_GUARD(unescaped);
</del><span class="cx"> return 0;
</span><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> static int
</span><span class="cx"> rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err)
</span><span class="cx"> {
</span><del>- int ret;
</del><span class="cx"> rb_encoding *enc = rb_enc_get(str);
</span><span class="cx"> if (options & ARG_ENCODING_NONE) {
</span><span class="cx"> #if !WITH_OBJC
</span><span class="lines">@@ -2475,10 +2608,8 @@
</span><span class="cx"> }
</span><span class="cx"> #endif
</span><span class="cx"> }
</span><del>- ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
-                         options, err);
- RB_GC_GUARD(str);
- return ret;
</del><ins>+ return rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
+         options, err);
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> static VALUE
</span></span></pre></div>
<a id="MacRubybranchesexperimentalspecmacrubycorehash_specrb"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -62,7 +62,7 @@
</span><span class="cx"> end
</span><span class="cx">
</span><span class="cx"> it "can have a singleton class" do
</span><del>- a = NSDictionary.array
</del><ins>+ a = NSDictionary.dictionary
</ins><span class="cx"> def a.foo; 42; end
</span><span class="cx"> a.foo.should == 42
</span><span class="cx"> lambda { a[42] = 123 }.should raise_error(RuntimeError)
</span></span></pre></div>
<a id="MacRubybranchesexperimentalstringc"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/string.c (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/string.c        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/string.c        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -157,6 +157,15 @@
</span><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> VALUE
</span><ins>+rb_unicode_str_new(const UniChar *ptr, const size_t len)
+{
+ VALUE str = str_alloc(rb_cString);
+ CFStringAppendCharacters((CFMutableStringRef)str,
+         ptr, len);
+ return str;
+}
+
+VALUE
</ins><span class="cx"> rb_str_new(const char *ptr, long len)
</span><span class="cx"> {
</span><span class="cx"> return str_new(rb_cString, ptr, len);
</span><span class="lines">@@ -165,17 +174,13 @@
</span><span class="cx"> VALUE
</span><span class="cx"> rb_usascii_str_new(const char *ptr, long len)
</span><span class="cx"> {
</span><del>- VALUE str = str_new(rb_cString, ptr, len);
-
- return str;
</del><ins>+ return str_new(rb_cString, ptr, len);
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> VALUE
</span><span class="cx"> rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
</span><span class="cx"> {
</span><del>- VALUE str = str_new(rb_cString, ptr, len);
-
- return str;
</del><ins>+ return str_new(rb_cString, ptr, len);
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> VALUE
</span><span class="lines">@@ -592,8 +597,12 @@
</span><span class="cx">          kCFStringEncodingUTF8);
</span><span class="cx">
</span><span class="cx"> cptr = (char *)xmalloc(max + 1);
</span><del>- assert(CFStringGetCString((CFStringRef)ptr, cptr,
-                max, kCFStringEncodingUTF8));
</del><ins>+ if (!CFStringGetCString((CFStringRef)ptr, cptr,
+                max + 1, kCFStringEncodingUTF8)) {
+        // Probably a UTF16 string...
+        xfree(cptr);
+        return NULL;
+ }
</ins><span class="cx">
</span><span class="cx"> return cptr;
</span><span class="cx"> }
</span><span class="lines">@@ -817,6 +826,9 @@
</span><span class="cx">         Check_Type(str2, T_STRING);
</span><span class="cx"> }
</span><span class="cx">
</span><ins>+ CFStringAppend((CFMutableStringRef)str, (CFStringRef)str2);
+
+#if 0
</ins><span class="cx"> const char *ptr;
</span><span class="cx"> long len;
</span><span class="cx">
</span><span class="lines">@@ -824,6 +836,7 @@
</span><span class="cx"> len = RSTRING_LEN(str2);
</span><span class="cx">
</span><span class="cx"> rb_objc_str_cat(str, ptr, len, kCFStringEncodingASCII);
</span><ins>+#endif
</ins><span class="cx">
</span><span class="cx"> return str;
</span><span class="cx"> }
</span><span class="lines">@@ -1892,13 +1905,12 @@
</span><span class="cx"> static VALUE
</span><span class="cx"> rb_str_sub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
</span><span class="cx"> {
</span><del>- VALUE pat, repl, match, hash = Qnil;
- struct re_registers *regs;
- int iter = 0;
- int tainted = 0;
</del><ins>+ VALUE repl, hash = Qnil;
+ bool iter = false;
+ bool tainted = false;
</ins><span class="cx">
</span><span class="cx"> if (argc == 1 && rb_block_given_p()) {
</span><del>-        iter = 1;
</del><ins>+        iter = true;
</ins><span class="cx"> }
</span><span class="cx"> else if (argc == 2) {
</span><span class="cx">         repl = argv[1];
</span><span class="lines">@@ -1906,30 +1918,33 @@
</span><span class="cx">         if (NIL_P(hash)) {
</span><span class="cx">          StringValue(repl);
</span><span class="cx">         }
</span><del>-        if (OBJ_TAINTED(repl)) tainted = 1;
</del><ins>+        if (OBJ_TAINTED(repl)) {
+         tainted = true;
+        }
</ins><span class="cx"> }
</span><span class="cx"> else {
</span><span class="cx">         rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
</span><span class="cx"> }
</span><span class="cx">
</span><del>- pat = get_pat(argv[0], 1);
</del><ins>+ VALUE pat = get_pat(argv[0], 1);
</ins><span class="cx"> if (rb_reg_search(pat, str, 0, 0) >= 0) {
</span><ins>+        VALUE match = rb_backref_get();
+        struct re_registers *regs = RMATCH_REGS(match);
</ins><span class="cx">
</span><del>-        match = rb_backref_get();
-        regs = RMATCH_REGS(match);
-
</del><span class="cx">         if (iter || !NIL_P(hash)) {
</span><del>-
</del><span class="cx"> if (iter) {
</span><span class="cx"> rb_match_busy(match);
</span><span class="cx"> repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
</span><span class="cx"> }
</span><span class="cx"> else {
</span><del>- repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
</del><ins>+ repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+                         END(0) - BEG(0)));
</ins><span class="cx"> repl = rb_obj_as_string(repl);
</span><span class="cx"> }
</span><span class="cx">          str_frozen_check(str);
</span><del>-         if (iter) rb_backref_set(match);
</del><ins>+         if (iter) {
+                rb_backref_set(match);
+         }
</ins><span class="cx">         }
</span><span class="cx">         else {
</span><span class="cx">          repl = rb_reg_regsub(repl, str, regs, pat);
</span><span class="lines">@@ -1937,16 +1952,18 @@
</span><span class="cx">
</span><span class="cx">         rb_str_modify(str);
</span><span class="cx">         rb_str_splice_0(str, BEG(0), END(0) - BEG(0), repl);
</span><del>-        if (OBJ_TAINTED(repl)) tainted = 1;
</del><ins>+        if (OBJ_TAINTED(repl)) {
+         tainted = true;
+        }
</ins><span class="cx">
</span><del>-        if (tainted) OBJ_TAINT(str);
-
</del><ins>+        if (tainted) {
+         OBJ_TAINT(str);
+        }
</ins><span class="cx">         return str;
</span><span class="cx"> }
</span><span class="cx"> return Qnil;
</span><span class="cx"> }
</span><span class="cx">
</span><del>-
</del><span class="cx"> /*
</span><span class="cx"> * call-seq:
</span><span class="cx"> * str.sub(pattern, replacement) => new_str
</span><span class="lines">@@ -1989,37 +2006,35 @@
</span><span class="cx"> static VALUE
</span><span class="cx"> str_gsub(SEL sel, int argc, VALUE *argv, VALUE str, int bang)
</span><span class="cx"> {
</span><del>- VALUE pat, val, repl, match, dest, hash = Qnil;
- struct re_registers *regs;
- long beg, n;
- long offset, slen, len;
- int iter = 0;
- const char *sp, *cp;
- int tainted = 0;
- rb_encoding *str_enc;
-
</del><ins>+ bool iter = false;
+ bool tainted = false;
+ VALUE hash = Qnil, repl = Qnil;
+
</ins><span class="cx"> switch (argc) {
</span><del>- case 1:
-        RETURN_ENUMERATOR(str, argc, argv);
-        iter = 1;
-        break;
- case 2:
-        repl = argv[1];
-        hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
-        if (NIL_P(hash)) {
-         StringValue(repl);
-        }
-        if (OBJ_TAINTED(repl)) {
-         tainted = 1;
-        }
-        break;
- default:
-        rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
</del><ins>+        case 1:
+         RETURN_ENUMERATOR(str, argc, argv);
+         iter = true;
+         break;
+
+        case 2:
+         repl = argv[1];
+         hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
+         if (NIL_P(hash)) {
+                StringValue(repl);
+         }
+         if (OBJ_TAINTED(repl)) {
+                tainted = true;
+         }
+         break;
+
+        default:
+         rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)",
+                 argc);
</ins><span class="cx"> }
</span><span class="cx">
</span><del>- pat = get_pat(argv[0], 1);
- offset=0; n=0;
- beg = rb_reg_search(pat, str, 0, 0);
</del><ins>+ VALUE pat = get_pat(argv[0], 1);
+ long offset = 0;
+ long beg = rb_reg_search(pat, str, 0, 0);
</ins><span class="cx"> if (beg < 0) {
</span><span class="cx">         if (bang) {
</span><span class="cx">          return Qnil;        /* no match, no substitution */
</span><span class="lines">@@ -2027,23 +2042,23 @@
</span><span class="cx">         return rb_str_new3(str);
</span><span class="cx"> }
</span><span class="cx">
</span><del>- dest = rb_str_new5(str, NULL, 0);
- slen = RSTRING_LEN(str);
- sp = RSTRING_PTR(str);
- cp = sp;
- str_enc = NULL;
</del><ins>+ VALUE dest = rb_str_new5(str, NULL, 0);
+ long slen = RSTRING_LEN(str);
+ VALUE match;
</ins><span class="cx">
</span><span class="cx"> do {
</span><del>-        n++;
</del><span class="cx">         match = rb_backref_get();
</span><del>-        regs = RMATCH_REGS(match);
</del><ins>+        struct re_registers *regs = RMATCH_REGS(match);
+ VALUE val;
+
</ins><span class="cx">         if (iter || !NIL_P(hash)) {
</span><span class="cx"> if (iter) {
</span><span class="cx"> rb_match_busy(match);
</span><span class="cx"> val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
</span><span class="cx"> }
</span><span class="cx"> else {
</span><del>- val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
</del><ins>+ val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+                         END(0) - BEG(0)));
</ins><span class="cx"> val = rb_obj_as_string(val);
</span><span class="cx"> }
</span><span class="cx">          str_mod_check(str, sp, slen);
</span><span class="lines">@@ -2062,13 +2077,15 @@
</span><span class="cx">          val = rb_reg_regsub(repl, str, regs, pat);
</span><span class="cx">         }
</span><span class="cx">
</span><ins>+
</ins><span class="cx">         if (OBJ_TAINTED(val)) {
</span><del>-         tainted = 1;
</del><ins>+         tainted = true;
</ins><span class="cx">         }
</span><span class="cx">
</span><del>-        len = beg - offset;        /* copy pre-match substr */
- if (len) {
-         rb_enc_str_buf_cat(dest, cp, len, str_enc);
</del><ins>+        long len = beg - offset; /* copy pre-match substr */
+ if (len > 0) {
+         rb_str_buf_append(dest, rb_str_subseq(str, offset, len));
+         //rb_enc_str_buf_cat(dest, cp, len, str_enc);
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> rb_str_buf_append(dest, val);
</span><span class="lines">@@ -2079,18 +2096,24 @@
</span><span class="cx">          * Always consume at least one character of the input string
</span><span class="cx">          * in order to prevent infinite loops.
</span><span class="cx">          */
</span><del>-         if (slen <= END(0)) break;
</del><ins>+         if (slen <= END(0)) {
+                break;
+         }
</ins><span class="cx">          len = 1;
</span><del>- rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
</del><ins>+         rb_str_buf_append(dest, rb_str_subseq(str, END(0), len));
+ //rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
</ins><span class="cx">          offset = END(0) + len;
</span><span class="cx">         }
</span><del>-        cp = sp + offset;
-        if (offset > slen) break;
</del><ins>+        if (offset > slen) {
+         break;
+        }
</ins><span class="cx">         beg = rb_reg_search(pat, str, offset, 0);
</span><del>- }
</del><ins>+ }
</ins><span class="cx"> while (beg >= 0);
</span><ins>+
</ins><span class="cx"> if (slen > offset) {
</span><del>- rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
</del><ins>+        rb_str_buf_append(dest, rb_str_subseq(str, offset, slen - offset));
+ //rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
</ins><span class="cx"> }
</span><span class="cx"> rb_backref_set(match);
</span><span class="cx"> if (bang) {
</span><span class="lines">@@ -2099,7 +2122,7 @@
</span><span class="cx"> }
</span><span class="cx"> else {
</span><span class="cx">         if (!tainted && OBJ_TAINTED(str)) {
</span><del>-         tainted = 1;
</del><ins>+         tainted = true;
</ins><span class="cx">         }
</span><span class="cx">         str = dest;
</span><span class="cx"> }
</span><span class="lines">@@ -2443,6 +2466,7 @@
</span><span class="cx"> return str;
</span><span class="cx"> }
</span><span class="cx">
</span><ins>+#if 0
</ins><span class="cx"> static void
</span><span class="cx"> str_cat_char(VALUE str, int c, rb_encoding *enc)
</span><span class="cx"> {
</span><span class="lines">@@ -2458,6 +2482,7 @@
</span><span class="cx"> str_cat_char(str, '\\', enc);
</span><span class="cx"> str_cat_char(str, c, enc);
</span><span class="cx"> }
</span><ins>+#endif
</ins><span class="cx">
</span><span class="cx"> /*
</span><span class="cx"> * call-seq:
</span><span class="lines">@@ -2471,13 +2496,37 @@
</span><span class="cx"> * str.inspect #=> "\"hel\\bo\""
</span><span class="cx"> */
</span><span class="cx">
</span><ins>+static inline void
+__append(CFMutableStringRef out, UniChar c, bool prefix)
+{
+ CFStringAppendCharacters(out, &c, 1);
+}
+
</ins><span class="cx"> VALUE
</span><span class="cx"> rb_str_inspect(VALUE str, SEL sel)
</span><span class="cx"> {
</span><del>- rb_encoding *enc = STR_ENC_GET(str);
</del><ins>+ const long len = CFStringGetLength((CFStringRef)str);
+ CFStringInlineBuffer buf;
+ CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, len));
+
+ CFMutableStringRef out = CFStringCreateMutable(NULL, 0);
+ __append(out, '"', false);
+
+ long i;
+ for (i = 0; i < len; i++) {
+        UniChar c = CFStringGetCharacterFromInlineBuffer(&buf, i);
+        __append(out, c, false);
+ }
+ __append(out, '"', false);
+
+ return (VALUE)CFMakeCollectable(out);
+
+#if 0
</ins><span class="cx"> const char *p, *pend;
</span><span class="cx"> VALUE result;
</span><span class="cx">
</span><ins>+
+
</ins><span class="cx"> p = RSTRING_PTR(str);
</span><span class="cx"> pend = p + RSTRING_LEN(str);
</span><span class="cx"> if (p == NULL) {
</span><span class="lines">@@ -2545,6 +2594,7 @@
</span><span class="cx"> str_cat_char(result, '"', enc);
</span><span class="cx">
</span><span class="cx"> return result;
</span><ins>+#endif
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
</span></span></pre>
</div>
</div>
</body>
</html>