<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[2036] MacRuby/branches/experimental</title>
</head>
<body>

<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.macosforge.org/projects/ruby/changeset/2036">2036</a></dd>
<dt>Author</dt> <dd>lsansonetti@apple.com</dd>
<dt>Date</dt> <dd>2009-07-20 13:49:50 -0700 (Mon, 20 Jul 2009)</dd>
</dl>

<h3>Log Message</h3>
<pre>some work on unicode support (still in progress)</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#MacRubybranchesexperimentalcompilercpp">MacRuby/branches/experimental/compiler.cpp</a></li>
<li><a href="#MacRubybranchesexperimentalcompilerh">MacRuby/branches/experimental/compiler.h</a></li>
<li><a href="#MacRubybranchesexperimentalioc">MacRuby/branches/experimental/io.c</a></li>
<li><a href="#MacRubybranchesexperimentalparsey">MacRuby/branches/experimental/parse.y</a></li>
<li><a href="#MacRubybranchesexperimentalrec">MacRuby/branches/experimental/re.c</a></li>
<li><a href="#MacRubybranchesexperimentalspecmacrubycorehash_specrb">MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb</a></li>
<li><a href="#MacRubybranchesexperimentalstringc">MacRuby/branches/experimental/string.c</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="MacRubybranchesexperimentalcompilercpp"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/compiler.cpp (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/compiler.cpp        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.cpp        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -138,7 +138,7 @@
</span><span class="cx">     falseVal = ConstantInt::get(RubyObjTy, Qfalse);
</span><span class="cx">     undefVal = ConstantInt::get(RubyObjTy, Qundef);
</span><span class="cx">     splatArgFollowsVal = ConstantInt::get(RubyObjTy, SPLAT_ARG_FOLLOWS);
</span><del>-    cObject = ConstantInt::get(RubyObjTy, (long)rb_cObject);
</del><ins>+    cObject = ConstantInt::get(RubyObjTy, rb_cObject);
</ins><span class="cx">     PtrTy = PointerType::getUnqual(Type::Int8Ty);
</span><span class="cx">     PtrPtrTy = PointerType::getUnqual(PtrTy);
</span><span class="cx">     Int32PtrTy = PointerType::getUnqual(Type::Int32Ty);
</span><span class="lines">@@ -407,21 +407,56 @@
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> GlobalVariable *
</span><ins>+RoxorCompiler::compile_const_global_ustring(const UniChar *str,
+        const size_t len, CFHashCode hash)
+{
+    assert(len &gt; 0);
+
+    std::map&lt;CFHashCode, GlobalVariable *&gt;::iterator iter =
+        static_ustrings.find(hash);
+
+    GlobalVariable *gvar;
+    if (iter == static_ustrings.end()) {
+        const ArrayType *str_type = ArrayType::get(Type::Int16Ty, len);
+
+        std::vector&lt;Constant *&gt; ary_elements;
+        for (unsigned int i = 0; i &lt; len; i++) {
+            ary_elements.push_back(ConstantInt::get(Type::Int16Ty, str[i]));
+        }
+
+        gvar = new GlobalVariable(
+                str_type,
+                true,
+                GlobalValue::InternalLinkage,
+                ConstantArray::get(str_type, ary_elements),
+                &quot;&quot;,
+                RoxorCompiler::module);
+
+        static_ustrings[hash] = gvar;
+    }
+    else {
+        gvar = iter-&gt;second;
+    }
+
+    return gvar;
+}
+
+GlobalVariable *
</ins><span class="cx"> RoxorCompiler::compile_const_global_string(const char *str,
</span><del>-        const size_t str_len)
</del><ins>+        const size_t len)
</ins><span class="cx"> {
</span><del>-    assert(str_len &gt; 0);
</del><ins>+    assert(len &gt; 0);
</ins><span class="cx"> 
</span><del>-    std::string s(str, str_len);
</del><ins>+    std::string s(str, len);
</ins><span class="cx">     std::map&lt;std::string, GlobalVariable *&gt;::iterator iter =
</span><span class="cx">         static_strings.find(s);
</span><span class="cx"> 
</span><span class="cx">     GlobalVariable *gvar;
</span><span class="cx">     if (iter == static_strings.end()) {
</span><del>-        const ArrayType *str_type = ArrayType::get(Type::Int8Ty, str_len + 1);
</del><ins>+        const ArrayType *str_type = ArrayType::get(Type::Int8Ty, len + 1);
</ins><span class="cx"> 
</span><span class="cx">         std::vector&lt;Constant *&gt; ary_elements;
</span><del>-        for (unsigned int i = 0; i &lt; str_len; i++) {
</del><ins>+        for (unsigned int i = 0; i &lt; len; i++) {
</ins><span class="cx">             ary_elements.push_back(ConstantInt::get(Type::Int8Ty, str[i]));
</span><span class="cx">         }
</span><span class="cx">         ary_elements.push_back(ConstantInt::get(Type::Int8Ty, 0));
</span><span class="lines">@@ -2492,7 +2527,6 @@
</span><span class="cx">         //
</span><span class="cx">         //        10.times { s = 'foo'; s &lt;&lt; 'bar' }
</span><span class="cx">         //
</span><del>-        const char *str = RSTRING_PTR(val);
</del><span class="cx">         const size_t str_len = RSTRING_LEN(val);
</span><span class="cx">         if (str_len == 0) {
</span><span class="cx">             if (newString3Func == NULL) {        
</span><span class="lines">@@ -2503,9 +2537,18 @@
</span><span class="cx">             return CallInst::Create(newString3Func, &quot;&quot;, bb);
</span><span class="cx">         }
</span><span class="cx">         else {
</span><del>-            GlobalVariable *str_gvar = compile_const_global_string(str,
-                    str_len);
</del><ins>+            UniChar *buf = (UniChar *)CFStringGetCharactersPtr(
+                    (CFStringRef)val);
</ins><span class="cx"> 
</span><ins>+            if (buf == NULL) {
+                buf = (UniChar *)alloca(sizeof(UniChar) * str_len);
+                CFStringGetCharacters((CFStringRef)val,
+                        CFRangeMake(0, str_len), buf);
+            }
+
+            GlobalVariable *str_gvar = compile_const_global_ustring(buf,
+                    str_len, CFHash((CFTypeRef)val));
+
</ins><span class="cx">             std::vector&lt;Value *&gt; idxs;
</span><span class="cx">             idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
</span><span class="cx">             idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
</span><span class="lines">@@ -2515,8 +2558,9 @@
</span><span class="cx">             if (newString2Func == NULL) {        
</span><span class="cx">                 newString2Func = cast&lt;Function&gt;(
</span><span class="cx">                         module-&gt;getOrInsertFunction(
</span><del>-                            &quot;rb_str_new&quot;, RubyObjTy, PtrTy, Type::Int32Ty,
-                            NULL));
</del><ins>+                            &quot;rb_unicode_str_new&quot;,
+                            RubyObjTy, PointerType::getUnqual(Type::Int16Ty),
+                            Type::Int32Ty, NULL));
</ins><span class="cx">             }
</span><span class="cx"> 
</span><span class="cx">             std::vector&lt;Value *&gt; params;
</span></span></pre></div>
<a id="MacRubybranchesexperimentalcompilerh"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/compiler.h (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/compiler.h        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.h        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -66,6 +66,7 @@
</span><span class="cx">         std::vector&lt;ID&gt; dvars;
</span><span class="cx">         std::map&lt;ID, Instruction *&gt; ivar_slots_cache;
</span><span class="cx">         std::map&lt;std::string, GlobalVariable *&gt; static_strings;
</span><ins>+        std::map&lt;CFHashCode, GlobalVariable *&gt; static_ustrings;
</ins><span class="cx"> 
</span><span class="cx"> #if ROXOR_COMPILER_DEBUG
</span><span class="cx">         int level;
</span><span class="lines">@@ -249,6 +250,9 @@
</span><span class="cx">         GlobalVariable *compile_const_global_string(const char *str) {
</span><span class="cx">             return compile_const_global_string(str, strlen(str));
</span><span class="cx">         }
</span><ins>+        GlobalVariable *compile_const_global_ustring(const UniChar *str,
+                const size_t str_len, CFHashCode hash);
+
</ins><span class="cx">         Value *compile_arity(rb_vm_arity_t &amp;arity);
</span><span class="cx">         Value *compile_literal(VALUE val);
</span><span class="cx">         virtual Value *compile_immutable_literal(VALUE val);
</span></span></pre></div>
<a id="MacRubybranchesexperimentalioc"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/io.c (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/io.c        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/io.c        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -460,22 +460,11 @@
</span><span class="cx">     }
</span><span class="cx">     else {
</span><span class="cx">         buffer = (UInt8 *)RSTRING_PTR(to_write);
</span><del>-        if (buffer != NULL) {
-            length = RSTRING_LEN(to_write);
</del><ins>+        if (buffer == NULL) {
+            rb_raise(rb_eRuntimeError,
+                    &quot;could not extract a string from the read data.&quot;);
</ins><span class="cx">         }
</span><del>-        else {
-            const long max = CFStringGetMaximumSizeForEncoding(
-                    CFStringGetLength((CFStringRef)to_write),
-                    kCFStringEncodingUTF8);
-
-            buffer = (UInt8 *)alloca(max + 1);
-            if (!CFStringGetCString((CFStringRef)to_write, (char *)buffer, 
-                        max, kCFStringEncodingUTF8)) {
-                rb_raise(rb_eRuntimeError,
-                        &quot;could not extract a string from the read data.&quot;);
-            }
-            length = strlen((char *)buffer);
-        }
</del><ins>+        length = strlen((char *)buffer);
</ins><span class="cx">     }
</span><span class="cx"> 
</span><span class="cx">     if (length == 0) {
</span></span></pre></div>
<a id="MacRubybranchesexperimentalparsey"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/parse.y (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/parse.y        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/parse.y        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -290,14 +290,34 @@
</span><span class="cx"> 
</span><span class="cx"> #if WITH_OBJC
</span><span class="cx"> # define UTF8_ENC() (NULL)
</span><ins>+static inline VALUE
+__new_tmp_str(const char *ptr, const size_t len)
+{
+    if (ptr != NULL) {
+        CFStringRef str = CFStringCreateWithBytes(NULL, (UInt8 *)ptr, len,
+                kCFStringEncodingUTF8, false);
+        if (str != NULL) {
+            CFMutableStringRef str2 =
+                CFStringCreateMutableCopy(NULL, 0, str);
+            assert(str2 != NULL);
+            CFRelease(str);
+            return (VALUE)CFMakeCollectable(str2);
+        }
+    }
+    return rb_usascii_str_new(ptr, len);
+}
+# define STR_NEW(p,n) __new_tmp_str(p, n)
+# define STR_NEW0() __new_tmp_str(0, 0)
+# define STR_NEW2(p) __new_tmp_str(p, strlen(p))
+# define STR_NEW3(p,n,e,func) __new_tmp_str(p, n)
</ins><span class="cx"> #else
</span><span class="cx"> # define UTF8_ENC() (parser-&gt;utf8 ? parser-&gt;utf8 : \
</span><span class="cx">                     (parser-&gt;utf8 = rb_utf8_encoding()))
</span><ins>+# define STR_NEW(p,n) rb_enc_str_new((p),(n),parser-&gt;enc)
+# define STR_NEW0() rb_usascii_str_new(0,0)
+# define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser-&gt;enc)
+# define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser-&gt;enc)
</ins><span class="cx"> #endif
</span><del>-#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser-&gt;enc)
-#define STR_NEW0() rb_usascii_str_new(0,0)
-#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser-&gt;enc)
-#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser-&gt;enc)
</del><span class="cx"> #if WITH_OBJC
</span><span class="cx"> # define STR_ENC(m) (parser-&gt;enc)
</span><span class="cx"> # define ENC_SINGLE(cr) (1)
</span><span class="lines">@@ -5238,6 +5258,7 @@
</span><span class="cx">     str_dsym   = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND),
</span><span class="cx"> };
</span><span class="cx"> 
</span><ins>+#if 0
</ins><span class="cx"> static VALUE
</span><span class="cx"> parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0)
</span><span class="cx"> {
</span><span class="lines">@@ -5266,6 +5287,7 @@
</span><span class="cx"> 
</span><span class="cx">     return str;
</span><span class="cx"> }
</span><ins>+#endif
</ins><span class="cx"> 
</span><span class="cx"> #define lex_goto_eol(parser) (parser-&gt;parser_lex_p = parser-&gt;parser_lex_pend)
</span><span class="cx"> 
</span></span></pre></div>
<a id="MacRubybranchesexperimentalrec"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/re.c (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/re.c        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/re.c        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -9,6 +9,7 @@
</span><span class="cx"> 
</span><span class="cx"> **********************************************************************/
</span><span class="cx"> 
</span><ins>+#include &quot;oniguruma.h&quot;
</ins><span class="cx"> #include &quot;ruby/ruby.h&quot;
</span><span class="cx"> #include &quot;ruby/re.h&quot;
</span><span class="cx"> #include &quot;ruby/encoding.h&quot;
</span><span class="lines">@@ -449,10 +450,13 @@
</span><span class="cx">     rb_reg_check(re);
</span><span class="cx">     cstr = RREGEXP(re)-&gt;str;
</span><span class="cx">     clen = RREGEXP(re)-&gt;len;
</span><del>-    if (clen == 0)
</del><ins>+    if (clen == 0) {
</ins><span class="cx">         cstr = NULL;
</span><ins>+    }
</ins><span class="cx">     str = rb_enc_str_new(cstr, clen, rb_enc_get(re));
</span><del>-    if (OBJ_TAINTED(re)) OBJ_TAINT(str);
</del><ins>+    if (OBJ_TAINTED(re)) {
+        OBJ_TAINT(str);
+    }
</ins><span class="cx">     return str;
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -772,7 +776,8 @@
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> static Regexp*
</span><del>-make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err)
</del><ins>+make_regexp(const char *s, long len, rb_encoding *enc, int flags,
+        onig_errmsg_buffer err)
</ins><span class="cx"> {
</span><span class="cx">     Regexp *rp;
</span><span class="cx">     int r;
</span><span class="lines">@@ -1228,9 +1233,88 @@
</span><span class="cx"> }
</span><span class="cx"> #endif
</span><span class="cx"> 
</span><ins>+static void
+get_cstring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+        bool *should_free)
+{
+    if (pcstr != NULL &amp;&amp; pcharsize != NULL &amp;&amp; should_free != NULL) {
+        char *p = (char *)CFStringGetCStringPtr((CFStringRef)str, enc);
+        if (p != NULL) {
+            *should_free = false;
+        }
+        else {
+            const size_t s = CFStringGetMaximumSizeForEncoding(
+                    CFStringGetLength((CFStringRef)str), enc);
+            p = (char *)malloc(s + 1);
+            assert(CFStringGetCString((CFStringRef)str, p, s + 1, enc));
+            *should_free = true;
+        }
+        *pcstr = p;
+        *pcharsize = sizeof(char);
+    }
+}
+
+static void
+get_unistring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+        bool *should_free)
+{
+    if (pcstr != NULL &amp;&amp; pcharsize != NULL &amp;&amp; should_free != NULL) {
+        UniChar *p = (UniChar *)CFStringGetCharactersPtr((CFStringRef)str);
+        const size_t str_len = CFStringGetLength((CFStringRef)str);
+        if (p != NULL) {
+            *should_free = false;
+        }
+        else {
+            const size_t s = CFStringGetMaximumSizeForEncoding(
+                    str_len, enc);
+            p = (UniChar *)malloc(s);
+            CFStringGetCharacters((CFStringRef)str,
+                    CFRangeMake(0, str_len),
+                    p);
+            *should_free = true;
+        }
+        *pcstr = (char *)p;
+        *pcharsize = sizeof(UniChar);
+    }
+}
+
+static inline bool
+multibyte_encoding(rb_encoding *enc)
+{
+    return enc == (rb_encoding *)ONIG_ENCODING_UTF16_BE
+        || enc == (rb_encoding *)ONIG_ENCODING_UTF16_LE
+        || enc == (rb_encoding *)ONIG_ENCODING_UTF32_BE
+        || enc == (rb_encoding *)ONIG_ENCODING_UTF32_LE;
+}
+
</ins><span class="cx"> static rb_encoding*
</span><del>-rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
</del><ins>+rb_reg_prepare_enc(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+        bool *should_free)
</ins><span class="cx"> {
</span><ins>+    CFStringEncoding enc = CFStringGetFastestEncoding((CFStringRef)str);
+    switch (enc) {
+        case kCFStringEncodingMacRoman:
+        case kCFStringEncodingWindowsLatin1:
+        case kCFStringEncodingISOLatin1:
+        case kCFStringEncodingNextStepLatin:
+        case kCFStringEncodingASCII:
+        case kCFStringEncodingNonLossyASCII:
+            get_cstring(str, enc, pcstr, pcharsize, should_free);
+            return (rb_encoding *)ONIG_ENCODING_ASCII;
+
+        case kCFStringEncodingUTF8:
+        case kCFStringEncodingUTF16:
+        case kCFStringEncodingUTF16BE:
+        case kCFStringEncodingUTF16LE:
+        case kCFStringEncodingUTF32:
+        case kCFStringEncodingUTF32BE:
+        case kCFStringEncodingUTF32LE:
+            get_unistring(str, enc, pcstr, pcharsize, should_free);
+            return (rb_encoding *)ONIG_ENCODING_UTF16_LE;
+    }
+
+    rb_raise(rb_eArgError, &quot;given string has unrecognized encoding&quot;);
+#if 0
</ins><span class="cx">     rb_encoding *enc = 0;
</span><span class="cx"> 
</span><span class="cx"> #if !WITH_OBJC
</span><span class="lines">@@ -1265,10 +1349,12 @@
</span><span class="cx">     }
</span><span class="cx"> #endif
</span><span class="cx">     return enc;
</span><ins>+#endif
</ins><span class="cx"> }
</span><span class="cx"> 
</span><del>-regex_t *
-rb_reg_prepare_re(VALUE re, VALUE str)
</del><ins>+static regex_t *
+rb_reg_prepare_re(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+        bool *should_free)
</ins><span class="cx"> {
</span><span class="cx">     regex_t *reg = RREGEXP(re)-&gt;ptr;
</span><span class="cx">     onig_errmsg_buffer err = &quot;&quot;;
</span><span class="lines">@@ -1277,38 +1363,49 @@
</span><span class="cx">     const char *pattern;
</span><span class="cx">     VALUE unescaped;
</span><span class="cx">     rb_encoding *fixed_enc = 0;
</span><del>-    rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
</del><ins>+    rb_encoding *enc = rb_reg_prepare_enc(re, str, pcstr, pcharsize,
+            should_free);
</ins><span class="cx"> 
</span><del>-#if !WITH_OBJC
-    if (reg-&gt;enc == enc) return reg;
-#endif
</del><ins>+    if ((rb_encoding *)reg-&gt;enc == enc) {
+        return reg;
+    }
</ins><span class="cx"> 
</span><span class="cx">     rb_reg_check(re);
</span><span class="cx">     reg = RREGEXP(re)-&gt;ptr;
</span><span class="cx">     pattern = RREGEXP(re)-&gt;str;
</span><span class="cx"> 
</span><del>-    unescaped = rb_reg_preprocess(
-        pattern, pattern + RREGEXP(re)-&gt;len, enc,
</del><ins>+    unescaped = rb_reg_preprocess(pattern, pattern + RREGEXP(re)-&gt;len, enc,
</ins><span class="cx">         &amp;fixed_enc, err);
</span><span class="cx"> 
</span><span class="cx">     if (unescaped == Qnil) {
</span><span class="cx">         rb_raise(rb_eArgError, &quot;regexp preprocess failed: %s&quot;, err);
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-#if WITH_OBJC
-    enc = (rb_encoding *)ONIG_ENCODING_ASCII;
-#endif
</del><ins>+    UChar *begin, *end;
+    if (multibyte_encoding(enc)) {
+        UniChar *chars = (UniChar *)CFStringGetCharactersPtr(
+                (CFStringRef)unescaped);
+        const long len = RSTRING_LEN(unescaped);
+        if (chars == NULL) {
+            chars = (UniChar *)alloca(sizeof(UniChar) * len);
+            CFStringGetCharacters((CFStringRef)unescaped,
+                    CFRangeMake(0, len), chars);
+        }
+        begin = (UChar *)chars;
+        end = (UChar *)chars + (sizeof(UniChar) * len);
+    }
+    else {
+        begin = (UChar *)RSTRING_PTR(unescaped);
+        end = begin + RSTRING_LEN(unescaped);
+    }
</ins><span class="cx"> 
</span><del>-    r = onig_new(&amp;reg, (UChar* )RSTRING_PTR(unescaped),
-                 (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)),
-                 reg-&gt;options, (OnigEncoding)enc,
-                 OnigDefaultSyntax, &amp;einfo);
-    if (r) {
</del><ins>+    r = onig_new(&amp;reg, begin, end, reg-&gt;options, (OnigEncoding)enc,
+            OnigDefaultSyntax, &amp;einfo);
+    if (r != 0) {
</ins><span class="cx">         onig_error_code_to_str((UChar*)err, r, &amp;einfo);
</span><span class="cx">         rb_reg_raise(pattern, RREGEXP(re)-&gt;len, err, re);
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    RB_GC_GUARD(unescaped);
</del><span class="cx">     return reg;
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -1321,7 +1418,7 @@
</span><span class="cx">     UChar *p, *string;
</span><span class="cx"> #endif
</span><span class="cx"> 
</span><del>-    enc = rb_reg_prepare_enc(re, str, 0);
</del><ins>+    enc = rb_reg_prepare_enc(re, str, NULL, NULL, NULL);
</ins><span class="cx"> 
</span><span class="cx">     if (reverse) {
</span><span class="cx">         range = -pos;
</span><span class="lines">@@ -1350,47 +1447,45 @@
</span><span class="cx"> int
</span><span class="cx"> rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
</span><span class="cx"> {
</span><del>-    int result;
-    VALUE match;
-    struct re_registers *pregs;
-    const char *cstr, *range;
-    long clen;
</del><span class="cx">     regex_t *reg0 = RREGEXP(re)-&gt;ptr, *reg;
</span><span class="cx">     int busy = FL_TEST(re, REG_BUSY);
</span><span class="cx"> 
</span><del>-    cstr = range = RSTRING_PTR(str);
-    clen = RSTRING_LEN(str);
-#if WITH_OBJC
</del><span class="cx">     static struct re_registers *regs = NULL;
</span><span class="cx">     if (regs == NULL) {
</span><span class="cx">         regs = xmalloc(sizeof(struct re_registers));
</span><span class="cx">         rb_objc_root(&amp;regs);
</span><span class="cx">     }
</span><del>-    pregs = regs;
-#else
-    static struct re_registers regs;
-    pregs = &amp;regs;
-#endif
</del><ins>+    struct re_registers *pregs = regs;
</ins><span class="cx"> 
</span><ins>+    const size_t clen = RSTRING_LEN(str);
</ins><span class="cx">     if (pos &gt; clen || pos &lt; 0) {
</span><span class="cx">         rb_backref_set(Qnil);
</span><span class="cx">         return -1;
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    reg = rb_reg_prepare_re(re, str);
</del><ins>+    char *cstr = NULL;
+    size_t charsize = 0;
+    bool should_free = false;
+    reg = rb_reg_prepare_re(re, str, &amp;cstr, &amp;charsize, &amp;should_free);
</ins><span class="cx"> 
</span><ins>+    char *range = cstr;
</ins><span class="cx">     FL_SET(re, REG_BUSY);
</span><span class="cx">     if (!reverse) {
</span><del>-        range += RSTRING_LEN(str);
</del><ins>+        range += (clen * charsize);
</ins><span class="cx">     }
</span><span class="cx">     MEMZERO(pregs, struct re_registers, 1);
</span><del>-    result = onig_search(RREGEXP(re)-&gt;ptr,
-                         (UChar*)cstr,
-                         ((UChar*)cstr + clen),
-                         ((UChar*)cstr + pos),
-                         ((UChar*)range),
-                         pregs, ONIG_OPTION_NONE);
</del><ins>+    int result = onig_search(reg,
+            (UChar*)cstr,
+            ((UChar*)cstr + (clen * charsize)),
+            ((UChar*)cstr + (pos * charsize)),
+            ((UChar*)range),
+            pregs, ONIG_OPTION_NONE);
</ins><span class="cx"> 
</span><ins>+    if (should_free) {
+        free(cstr);
+        cstr = NULL;
+    }
+
</ins><span class="cx">     if (RREGEXP(re)-&gt;ptr != reg) {
</span><span class="cx">         if (busy) {
</span><span class="cx">             onig_free(reg);
</span><span class="lines">@@ -1400,7 +1495,9 @@
</span><span class="cx">             RREGEXP(re)-&gt;ptr = reg;
</span><span class="cx">         }
</span><span class="cx">     }
</span><del>-    if (!busy) FL_UNSET(re, REG_BUSY);
</del><ins>+    if (!busy) {
+        FL_UNSET(re, REG_BUSY);
+    }
</ins><span class="cx">     if (result &lt; 0) {
</span><span class="cx">         onig_region_free(pregs, 0);
</span><span class="cx">         if (result == ONIG_MISMATCH) {
</span><span class="lines">@@ -1414,10 +1511,26 @@
</span><span class="cx">         }
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    if (charsize &gt; 1) {
+        int i;
+        for (i = 0; i &lt; pregs-&gt;num_regs; i++) {
+            if (pregs-&gt;beg[i] &gt; 0) {
+                assert((pregs-&gt;beg[i] % charsize) == 0);
+                pregs-&gt;beg[i] /= charsize;
+            }
+            if (pregs-&gt;end[i] &gt; 0) {
+                assert((pregs-&gt;end[i] % charsize) == 0);
+                pregs-&gt;end[i] /= charsize;
+            }
+        }
+        assert((result % charsize) == 0);
+        result /= charsize;
+    }
+
</ins><span class="cx"> #if WITH_OBJC
</span><del>-    match = match_alloc(rb_cMatch, 0);
</del><ins>+    VALUE match = match_alloc(rb_cMatch, 0);
</ins><span class="cx"> #else
</span><del>-    match = rb_backref_get();
</del><ins>+    VALUE match = rb_backref_get();
</ins><span class="cx">     if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
</span><span class="cx">         match = match_alloc(rb_cMatch);
</span><span class="cx">     }
</span><span class="lines">@@ -1467,7 +1580,9 @@
</span><span class="cx">     long start, end, len;
</span><span class="cx">     struct re_registers *regs;
</span><span class="cx"> 
</span><del>-    if (NIL_P(match)) return Qnil;
</del><ins>+    if (NIL_P(match)) {
+        return Qnil;
+    }
</ins><span class="cx">     match_check(match);
</span><span class="cx">     regs = RMATCH_REGS(match);
</span><span class="cx">     if (nth &gt;= regs-&gt;num_regs) {
</span><span class="lines">@@ -1475,14 +1590,17 @@
</span><span class="cx">     }
</span><span class="cx">     if (nth &lt; 0) {
</span><span class="cx">         nth += regs-&gt;num_regs;
</span><del>-        if (nth &lt;= 0) return Qnil;
</del><ins>+        if (nth &lt;= 0) {
+            return Qnil;
+        }
</ins><span class="cx">     }
</span><span class="cx">     start = BEG(nth);
</span><del>-    if (start == -1) return Qnil;
</del><ins>+    if (start == -1) {
+        return Qnil;
+    }
</ins><span class="cx">     end = END(nth);
</span><span class="cx">     len = end - start;
</span><span class="cx">     str = rb_str_subseq(RMATCH(match)-&gt;str, start, len);
</span><del>-    OBJ_INFECT(str, match);
</del><span class="cx">     return str;
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -1794,9 +1912,15 @@
</span><span class="cx">     VALUE str = rb_reg_last_match(match);
</span><span class="cx"> 
</span><span class="cx">     match_check(match);
</span><del>-    if (NIL_P(str)) str = rb_str_new(0,0);
-    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
-    if (OBJ_TAINTED(RMATCH(match)-&gt;str)) OBJ_TAINT(str);
</del><ins>+    if (NIL_P(str)) {
+        str = rb_str_new(0,0);
+    }
+    if (OBJ_TAINTED(match)) {
+        OBJ_TAINT(str);
+    }
+    else if (OBJ_TAINTED(RMATCH(match)-&gt;str)) {
+        OBJ_TAINT(str);
+    }
</ins><span class="cx">     return str;
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="lines">@@ -1886,9 +2010,10 @@
</span><span class="cx">     for (i = 0; i &lt; num_regs; i++) {
</span><span class="cx">         VALUE v;
</span><span class="cx">         rb_str_buf_cat2(str, &quot; &quot;);
</span><del>-        if (0 &lt; i) {
-            if (names[i].name)
</del><ins>+        if (i &gt; 0) {
+            if (names[i].name) { 
</ins><span class="cx">                 rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
</span><ins>+            }
</ins><span class="cx">             else {
</span><span class="cx">                 char buf[sizeof(i)*3+1];
</span><span class="cx">                 snprintf(buf, sizeof(buf), &quot;%d&quot;, i);
</span><span class="lines">@@ -1897,10 +2022,12 @@
</span><span class="cx">             rb_str_buf_cat2(str, &quot;:&quot;);
</span><span class="cx">         }
</span><span class="cx">         v = rb_reg_nth_match(i, match);
</span><del>-        if (v == Qnil)
</del><ins>+        if (v == Qnil) {
</ins><span class="cx">             rb_str_buf_cat2(str, &quot;nil&quot;);
</span><del>-        else
</del><ins>+        }
+        else {
</ins><span class="cx">             rb_str_buf_append(str, rb_str_inspect(v, 0));
</span><ins>+        }
</ins><span class="cx">     }
</span><span class="cx">     rb_str_buf_cat2(str, &quot;&gt;&quot;);
</span><span class="cx"> 
</span><span class="lines">@@ -2324,7 +2451,6 @@
</span><span class="cx">     enc = rb_enc_get(str);
</span><span class="cx"> 
</span><span class="cx">     buf = rb_reg_preprocess(p, end, enc, &amp;fixed_enc, err);
</span><del>-    RB_GC_GUARD(str);
</del><span class="cx"> 
</span><span class="cx">     if (buf == Qnil) {
</span><span class="cx">         return rb_reg_error_desc(str, 0, err);
</span><span class="lines">@@ -2404,15 +2530,21 @@
</span><span class="cx">     rb_encoding *a_enc = rb_ascii8bit_encoding();
</span><span class="cx"> #endif
</span><span class="cx"> 
</span><del>-    if (!OBJ_TAINTED(obj) &amp;&amp; rb_safe_level() &gt;= 4)
</del><ins>+    if (!OBJ_TAINTED(obj) &amp;&amp; rb_safe_level() &gt;= 4) {
</ins><span class="cx">         rb_raise(rb_eSecurityError, &quot;Insecure: can't modify regexp&quot;);
</span><ins>+    }
</ins><span class="cx">     rb_check_frozen(obj);
</span><del>-    if (FL_TEST(obj, REG_LITERAL))
</del><ins>+    if (FL_TEST(obj, REG_LITERAL)) {
</ins><span class="cx">         rb_raise(rb_eSecurityError, &quot;can't modify literal regexp&quot;);
</span><del>-    if (re-&gt;ptr) onig_free(re-&gt;ptr);
-    if (re-&gt;str) xfree(re-&gt;str);
-    re-&gt;ptr = 0;
-    re-&gt;str = 0;
</del><ins>+    }
+    if (re-&gt;ptr != NULL) {
+        onig_free(re-&gt;ptr);
+    }
+    if (re-&gt;str != NULL) {
+        xfree(re-&gt;str);
+    }
+    re-&gt;ptr = NULL;
+    re-&gt;str = NULL;
</ins><span class="cx"> 
</span><span class="cx">     unescaped = rb_reg_preprocess(s, s+len, enc, &amp;fixed_enc, err);
</span><span class="cx">     if (unescaped == Qnil)
</span><span class="lines">@@ -2444,23 +2576,24 @@
</span><span class="cx">     if (options &amp; ARG_ENCODING_NONE) {
</span><span class="cx">         re-&gt;basic.flags |= REG_ENCODING_NONE;
</span><span class="cx">     }
</span><del>-    
-    GC_WB(&amp;re-&gt;ptr, make_regexp(RSTRING_PTR(unescaped), 
-                                RSTRING_LEN(unescaped), enc,
-                                options &amp; ARG_REG_OPTION_MASK, err));
-    if (!re-&gt;ptr) return -1;
</del><ins>+   
+    Regexp *reg = make_regexp(RSTRING_PTR(unescaped), 
+            RSTRING_LEN(unescaped), enc,
+            options &amp; ARG_REG_OPTION_MASK, err);
+    if (reg == NULL) {
+        return -1;
+    }
+    GC_WB(&amp;re-&gt;ptr, reg);
</ins><span class="cx">     GC_WB(&amp;re-&gt;str, ALLOC_N(char, len+1));
</span><span class="cx">     memcpy(re-&gt;str, s, len);
</span><span class="cx">     re-&gt;str[len] = '\0';
</span><span class="cx">     re-&gt;len = len;
</span><del>-    RB_GC_GUARD(unescaped);
</del><span class="cx">     return 0;
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> static int
</span><span class="cx"> rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err)
</span><span class="cx"> {
</span><del>-    int ret;
</del><span class="cx">     rb_encoding *enc = rb_enc_get(str);
</span><span class="cx">     if (options &amp; ARG_ENCODING_NONE) {
</span><span class="cx"> #if !WITH_OBJC
</span><span class="lines">@@ -2475,10 +2608,8 @@
</span><span class="cx">         }
</span><span class="cx"> #endif
</span><span class="cx">     }
</span><del>-    ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
-                            options, err);
-    RB_GC_GUARD(str);
-    return ret;
</del><ins>+    return rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
+            options, err);
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> static VALUE
</span></span></pre></div>
<a id="MacRubybranchesexperimentalspecmacrubycorehash_specrb"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -62,7 +62,7 @@
</span><span class="cx">   end
</span><span class="cx"> 
</span><span class="cx">   it &quot;can have a singleton class&quot; do
</span><del>-    a = NSDictionary.array
</del><ins>+    a = NSDictionary.dictionary
</ins><span class="cx">     def a.foo; 42; end
</span><span class="cx">     a.foo.should == 42
</span><span class="cx">     lambda { a[42] = 123 }.should raise_error(RuntimeError)
</span></span></pre></div>
<a id="MacRubybranchesexperimentalstringc"></a>
<div class="modfile"><h4>Modified: MacRuby/branches/experimental/string.c (2035 => 2036)</h4>
<pre class="diff"><span>
<span class="info">--- MacRuby/branches/experimental/string.c        2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/string.c        2009-07-20 20:49:50 UTC (rev 2036)
</span><span class="lines">@@ -157,6 +157,15 @@
</span><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> VALUE
</span><ins>+rb_unicode_str_new(const UniChar *ptr, const size_t len)
+{
+    VALUE str = str_alloc(rb_cString);
+    CFStringAppendCharacters((CFMutableStringRef)str,
+            ptr, len);
+    return str;
+}
+
+VALUE
</ins><span class="cx"> rb_str_new(const char *ptr, long len)
</span><span class="cx"> {
</span><span class="cx">     return str_new(rb_cString, ptr, len);
</span><span class="lines">@@ -165,17 +174,13 @@
</span><span class="cx"> VALUE
</span><span class="cx"> rb_usascii_str_new(const char *ptr, long len)
</span><span class="cx"> {
</span><del>-    VALUE str = str_new(rb_cString, ptr, len);
-
-    return str;
</del><ins>+    return str_new(rb_cString, ptr, len);
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> VALUE
</span><span class="cx"> rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
</span><span class="cx"> {
</span><del>-    VALUE str = str_new(rb_cString, ptr, len);
-
-    return str;
</del><ins>+    return str_new(rb_cString, ptr, len);
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> VALUE
</span><span class="lines">@@ -592,8 +597,12 @@
</span><span class="cx">             kCFStringEncodingUTF8);
</span><span class="cx"> 
</span><span class="cx">     cptr = (char *)xmalloc(max + 1);
</span><del>-    assert(CFStringGetCString((CFStringRef)ptr, cptr,
-                max, kCFStringEncodingUTF8));
</del><ins>+    if (!CFStringGetCString((CFStringRef)ptr, cptr,
+                max + 1, kCFStringEncodingUTF8)) {
+        // Probably a UTF16 string...
+        xfree(cptr);
+        return NULL;
+    }
</ins><span class="cx"> 
</span><span class="cx">     return cptr;
</span><span class="cx"> }
</span><span class="lines">@@ -817,6 +826,9 @@
</span><span class="cx">         Check_Type(str2, T_STRING);
</span><span class="cx">     }
</span><span class="cx"> 
</span><ins>+    CFStringAppend((CFMutableStringRef)str, (CFStringRef)str2);
+
+#if 0
</ins><span class="cx">     const char *ptr;
</span><span class="cx">     long len;
</span><span class="cx"> 
</span><span class="lines">@@ -824,6 +836,7 @@
</span><span class="cx">     len = RSTRING_LEN(str2);
</span><span class="cx"> 
</span><span class="cx">     rb_objc_str_cat(str, ptr, len, kCFStringEncodingASCII);
</span><ins>+#endif
</ins><span class="cx"> 
</span><span class="cx">     return str;
</span><span class="cx"> }
</span><span class="lines">@@ -1892,13 +1905,12 @@
</span><span class="cx"> static VALUE
</span><span class="cx"> rb_str_sub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
</span><span class="cx"> {
</span><del>-    VALUE pat, repl, match, hash = Qnil;
-    struct re_registers *regs;
-    int iter = 0;
-    int tainted = 0;
</del><ins>+    VALUE repl, hash = Qnil;
+    bool iter = false;
+    bool tainted = false;
</ins><span class="cx"> 
</span><span class="cx">     if (argc == 1 &amp;&amp; rb_block_given_p()) {
</span><del>-        iter = 1;
</del><ins>+        iter = true;
</ins><span class="cx">     }
</span><span class="cx">     else if (argc == 2) {
</span><span class="cx">         repl = argv[1];
</span><span class="lines">@@ -1906,30 +1918,33 @@
</span><span class="cx">         if (NIL_P(hash)) {
</span><span class="cx">             StringValue(repl);
</span><span class="cx">         }
</span><del>-        if (OBJ_TAINTED(repl)) tainted = 1;
</del><ins>+        if (OBJ_TAINTED(repl)) {
+            tainted = true;
+        }
</ins><span class="cx">     }
</span><span class="cx">     else {
</span><span class="cx">         rb_raise(rb_eArgError, &quot;wrong number of arguments (%d for 2)&quot;, argc);
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    pat = get_pat(argv[0], 1);
</del><ins>+    VALUE pat = get_pat(argv[0], 1);
</ins><span class="cx">     if (rb_reg_search(pat, str, 0, 0) &gt;= 0) {
</span><ins>+        VALUE match = rb_backref_get();
+        struct re_registers *regs = RMATCH_REGS(match);
</ins><span class="cx"> 
</span><del>-        match = rb_backref_get();
-        regs = RMATCH_REGS(match);
-
</del><span class="cx">         if (iter || !NIL_P(hash)) {
</span><del>-
</del><span class="cx">             if (iter) {
</span><span class="cx">                 rb_match_busy(match);
</span><span class="cx">                 repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
</span><span class="cx">             }
</span><span class="cx">             else {
</span><del>-                repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
</del><ins>+                repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+                            END(0) - BEG(0)));
</ins><span class="cx">                 repl = rb_obj_as_string(repl);
</span><span class="cx">             }
</span><span class="cx">             str_frozen_check(str);
</span><del>-            if (iter) rb_backref_set(match);
</del><ins>+            if (iter) {
+                rb_backref_set(match);
+            }
</ins><span class="cx">         }
</span><span class="cx">         else {
</span><span class="cx">             repl = rb_reg_regsub(repl, str, regs, pat);
</span><span class="lines">@@ -1937,16 +1952,18 @@
</span><span class="cx"> 
</span><span class="cx">         rb_str_modify(str);
</span><span class="cx">         rb_str_splice_0(str, BEG(0), END(0) - BEG(0), repl);
</span><del>-        if (OBJ_TAINTED(repl)) tainted = 1;
</del><ins>+        if (OBJ_TAINTED(repl)) {
+            tainted = true;
+        }
</ins><span class="cx"> 
</span><del>-        if (tainted) OBJ_TAINT(str);
-
</del><ins>+        if (tainted) {
+            OBJ_TAINT(str);
+        }
</ins><span class="cx">         return str;
</span><span class="cx">     }
</span><span class="cx">     return Qnil;
</span><span class="cx"> }
</span><span class="cx"> 
</span><del>-
</del><span class="cx"> /*
</span><span class="cx">  *  call-seq:
</span><span class="cx">  *     str.sub(pattern, replacement)         =&gt; new_str
</span><span class="lines">@@ -1989,37 +2006,35 @@
</span><span class="cx"> static VALUE
</span><span class="cx"> str_gsub(SEL sel, int argc, VALUE *argv, VALUE str, int bang)
</span><span class="cx"> {
</span><del>-    VALUE pat, val, repl, match, dest, hash = Qnil;
-    struct re_registers *regs;
-    long beg, n;
-    long offset, slen, len;
-    int iter = 0;
-    const char *sp, *cp;
-    int tainted = 0;
-    rb_encoding *str_enc;
-    
</del><ins>+    bool iter = false;
+    bool tainted = false;
+    VALUE hash = Qnil, repl = Qnil;

</ins><span class="cx">     switch (argc) {
</span><del>-      case 1:
-        RETURN_ENUMERATOR(str, argc, argv);
-        iter = 1;
-        break;
-      case 2:
-        repl = argv[1];
-        hash = rb_check_convert_type(argv[1], T_HASH, &quot;Hash&quot;, &quot;to_hash&quot;);
-        if (NIL_P(hash)) {
-            StringValue(repl);
-        }
-        if (OBJ_TAINTED(repl)) {
-            tainted = 1;
-        }
-        break;
-      default:
-        rb_raise(rb_eArgError, &quot;wrong number of arguments (%d for 2)&quot;, argc);
</del><ins>+        case 1:
+            RETURN_ENUMERATOR(str, argc, argv);
+            iter = true;
+            break;
+
+        case 2:
+            repl = argv[1];
+            hash = rb_check_convert_type(argv[1], T_HASH, &quot;Hash&quot;, &quot;to_hash&quot;);
+            if (NIL_P(hash)) {
+                StringValue(repl);
+            }
+            if (OBJ_TAINTED(repl)) {
+                tainted = true;
+            }
+            break;
+
+        default:
+            rb_raise(rb_eArgError, &quot;wrong number of arguments (%d for 2)&quot;,
+                    argc);
</ins><span class="cx">     }
</span><span class="cx"> 
</span><del>-    pat = get_pat(argv[0], 1);
-    offset=0; n=0;
-    beg = rb_reg_search(pat, str, 0, 0);
</del><ins>+    VALUE pat = get_pat(argv[0], 1);
+    long offset = 0;
+    long beg = rb_reg_search(pat, str, 0, 0);
</ins><span class="cx">     if (beg &lt; 0) {
</span><span class="cx">         if (bang) {
</span><span class="cx">             return Qnil;        /* no match, no substitution */
</span><span class="lines">@@ -2027,23 +2042,23 @@
</span><span class="cx">         return rb_str_new3(str);
</span><span class="cx">     }
</span><span class="cx"> 
</span><del>-    dest = rb_str_new5(str, NULL, 0);
-    slen = RSTRING_LEN(str);
-    sp = RSTRING_PTR(str);
-    cp = sp;
-    str_enc = NULL;
</del><ins>+    VALUE dest = rb_str_new5(str, NULL, 0);
+    long slen = RSTRING_LEN(str);
+    VALUE match;
</ins><span class="cx"> 
</span><span class="cx">     do {
</span><del>-        n++;
</del><span class="cx">         match = rb_backref_get();
</span><del>-        regs = RMATCH_REGS(match);
</del><ins>+        struct re_registers *regs = RMATCH_REGS(match);
+        VALUE val;
+
</ins><span class="cx">         if (iter || !NIL_P(hash)) {
</span><span class="cx">             if (iter) {
</span><span class="cx">                 rb_match_busy(match);
</span><span class="cx">                 val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
</span><span class="cx">             }
</span><span class="cx">             else {
</span><del>-                val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
</del><ins>+                val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+                            END(0) - BEG(0)));
</ins><span class="cx">                 val = rb_obj_as_string(val);
</span><span class="cx">             }
</span><span class="cx">             str_mod_check(str, sp, slen);
</span><span class="lines">@@ -2062,13 +2077,15 @@
</span><span class="cx">             val = rb_reg_regsub(repl, str, regs, pat);
</span><span class="cx">         }
</span><span class="cx"> 
</span><ins>+
</ins><span class="cx">         if (OBJ_TAINTED(val)) {
</span><del>-            tainted = 1;
</del><ins>+            tainted = true;
</ins><span class="cx">         }
</span><span class="cx"> 
</span><del>-        len = beg - offset;        /* copy pre-match substr */
-        if (len) {
-            rb_enc_str_buf_cat(dest, cp, len, str_enc);
</del><ins>+        long len = beg - offset;  /* copy pre-match substr */
+        if (len &gt; 0) {
+            rb_str_buf_append(dest, rb_str_subseq(str, offset, len));
+            //rb_enc_str_buf_cat(dest, cp, len, str_enc);
</ins><span class="cx">         }
</span><span class="cx"> 
</span><span class="cx">         rb_str_buf_append(dest, val);
</span><span class="lines">@@ -2079,18 +2096,24 @@
</span><span class="cx">              * Always consume at least one character of the input string
</span><span class="cx">              * in order to prevent infinite loops.
</span><span class="cx">              */
</span><del>-            if (slen &lt;= END(0)) break;
</del><ins>+            if (slen &lt;= END(0)) {
+                break;
+            }
</ins><span class="cx">             len = 1;
</span><del>-            rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
</del><ins>+            rb_str_buf_append(dest, rb_str_subseq(str, END(0), len));
+            //rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
</ins><span class="cx">             offset = END(0) + len;
</span><span class="cx">         }
</span><del>-        cp = sp + offset;
-        if (offset &gt; slen) break;
</del><ins>+        if (offset &gt; slen) {
+            break;
+        }
</ins><span class="cx">         beg = rb_reg_search(pat, str, offset, 0);
</span><del>-    } 
</del><ins>+    }
</ins><span class="cx">     while (beg &gt;= 0);
</span><ins>+
</ins><span class="cx">     if (slen &gt; offset) {
</span><del>-        rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
</del><ins>+        rb_str_buf_append(dest, rb_str_subseq(str, offset, slen - offset));
+        //rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
</ins><span class="cx">     }
</span><span class="cx">     rb_backref_set(match);
</span><span class="cx">     if (bang) {
</span><span class="lines">@@ -2099,7 +2122,7 @@
</span><span class="cx">     }
</span><span class="cx">     else {
</span><span class="cx">             if (!tainted &amp;&amp; OBJ_TAINTED(str)) {
</span><del>-            tainted = 1;
</del><ins>+            tainted = true;
</ins><span class="cx">         }
</span><span class="cx">         str = dest;
</span><span class="cx">     }
</span><span class="lines">@@ -2443,6 +2466,7 @@
</span><span class="cx">     return str;
</span><span class="cx"> }
</span><span class="cx"> 
</span><ins>+#if 0
</ins><span class="cx"> static void
</span><span class="cx"> str_cat_char(VALUE str, int c, rb_encoding *enc)
</span><span class="cx"> {
</span><span class="lines">@@ -2458,6 +2482,7 @@
</span><span class="cx">     str_cat_char(str, '\\', enc);
</span><span class="cx">     str_cat_char(str, c, enc);
</span><span class="cx"> }
</span><ins>+#endif
</ins><span class="cx"> 
</span><span class="cx"> /*
</span><span class="cx">  * call-seq:
</span><span class="lines">@@ -2471,13 +2496,37 @@
</span><span class="cx">  *    str.inspect       #=&gt; &quot;\&quot;hel\\bo\&quot;&quot;
</span><span class="cx">  */
</span><span class="cx"> 
</span><ins>+static inline void
+__append(CFMutableStringRef out, UniChar c, bool prefix)
+{
+    CFStringAppendCharacters(out, &amp;c, 1);
+}
+
</ins><span class="cx"> VALUE
</span><span class="cx"> rb_str_inspect(VALUE str, SEL sel)
</span><span class="cx"> {
</span><del>-    rb_encoding *enc = STR_ENC_GET(str);
</del><ins>+    const long len = CFStringGetLength((CFStringRef)str);
+    CFStringInlineBuffer buf; 
+    CFStringInitInlineBuffer((CFStringRef)str, &amp;buf, CFRangeMake(0, len));
+
+    CFMutableStringRef out = CFStringCreateMutable(NULL, 0);
+    __append(out, '&quot;', false);
+
+    long i;
+    for (i = 0; i &lt; len; i++) {
+        UniChar c = CFStringGetCharacterFromInlineBuffer(&amp;buf, i);
+        __append(out, c, false);
+    }
+    __append(out, '&quot;', false);
+
+    return (VALUE)CFMakeCollectable(out);
+
+#if 0
</ins><span class="cx">     const char *p, *pend;
</span><span class="cx">     VALUE result;
</span><span class="cx"> 
</span><ins>+
+
</ins><span class="cx">     p = RSTRING_PTR(str); 
</span><span class="cx">     pend = p + RSTRING_LEN(str);
</span><span class="cx">     if (p == NULL) {
</span><span class="lines">@@ -2545,6 +2594,7 @@
</span><span class="cx">     str_cat_char(result, '&quot;', enc);
</span><span class="cx"> 
</span><span class="cx">     return result;
</span><ins>+#endif
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> #define IS_EVSTR(p,e) ((p) &lt; (e) &amp;&amp; (*(p) == '$' || *(p) == '@' || *(p) == '{'))
</span></span></pre>
</div>
</div>

</body>
</html>