[macruby-changes] [2036] MacRuby/branches/experimental

source_changes at macosforge.org source_changes at macosforge.org
Mon Jul 20 13:49:50 PDT 2009


Revision: 2036
          http://trac.macosforge.org/projects/ruby/changeset/2036
Author:   lsansonetti at apple.com
Date:     2009-07-20 13:49:50 -0700 (Mon, 20 Jul 2009)
Log Message:
-----------
some work on unicode support (still in progress)

Modified Paths:
--------------
    MacRuby/branches/experimental/compiler.cpp
    MacRuby/branches/experimental/compiler.h
    MacRuby/branches/experimental/io.c
    MacRuby/branches/experimental/parse.y
    MacRuby/branches/experimental/re.c
    MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb
    MacRuby/branches/experimental/string.c

Modified: MacRuby/branches/experimental/compiler.cpp
===================================================================
--- MacRuby/branches/experimental/compiler.cpp	2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.cpp	2009-07-20 20:49:50 UTC (rev 2036)
@@ -138,7 +138,7 @@
     falseVal = ConstantInt::get(RubyObjTy, Qfalse);
     undefVal = ConstantInt::get(RubyObjTy, Qundef);
     splatArgFollowsVal = ConstantInt::get(RubyObjTy, SPLAT_ARG_FOLLOWS);
-    cObject = ConstantInt::get(RubyObjTy, (long)rb_cObject);
+    cObject = ConstantInt::get(RubyObjTy, rb_cObject);
     PtrTy = PointerType::getUnqual(Type::Int8Ty);
     PtrPtrTy = PointerType::getUnqual(PtrTy);
     Int32PtrTy = PointerType::getUnqual(Type::Int32Ty);
@@ -407,21 +407,56 @@
 }
 
 GlobalVariable *
+RoxorCompiler::compile_const_global_ustring(const UniChar *str,
+	const size_t len, CFHashCode hash)
+{
+    assert(len > 0);
+
+    std::map<CFHashCode, GlobalVariable *>::iterator iter =
+	static_ustrings.find(hash);
+
+    GlobalVariable *gvar;
+    if (iter == static_ustrings.end()) {
+	const ArrayType *str_type = ArrayType::get(Type::Int16Ty, len);
+
+	std::vector<Constant *> ary_elements;
+	for (unsigned int i = 0; i < len; i++) {
+	    ary_elements.push_back(ConstantInt::get(Type::Int16Ty, str[i]));
+	}
+
+	gvar = new GlobalVariable(
+		str_type,
+		true,
+		GlobalValue::InternalLinkage,
+		ConstantArray::get(str_type, ary_elements),
+		"",
+		RoxorCompiler::module);
+
+	static_ustrings[hash] = gvar;
+    }
+    else {
+	gvar = iter->second;
+    }
+
+    return gvar;
+}
+
+GlobalVariable *
 RoxorCompiler::compile_const_global_string(const char *str,
-	const size_t str_len)
+	const size_t len)
 {
-    assert(str_len > 0);
+    assert(len > 0);
 
-    std::string s(str, str_len);
+    std::string s(str, len);
     std::map<std::string, GlobalVariable *>::iterator iter =
 	static_strings.find(s);
 
     GlobalVariable *gvar;
     if (iter == static_strings.end()) {
-	const ArrayType *str_type = ArrayType::get(Type::Int8Ty, str_len + 1);
+	const ArrayType *str_type = ArrayType::get(Type::Int8Ty, len + 1);
 
 	std::vector<Constant *> ary_elements;
-	for (unsigned int i = 0; i < str_len; i++) {
+	for (unsigned int i = 0; i < len; i++) {
 	    ary_elements.push_back(ConstantInt::get(Type::Int8Ty, str[i]));
 	}
 	ary_elements.push_back(ConstantInt::get(Type::Int8Ty, 0));
@@ -2492,7 +2527,6 @@
 	//
 	//	10.times { s = 'foo'; s << 'bar' }
 	//
-	const char *str = RSTRING_PTR(val);
 	const size_t str_len = RSTRING_LEN(val);
 	if (str_len == 0) {
 	    if (newString3Func == NULL) {	
@@ -2503,9 +2537,18 @@
 	    return CallInst::Create(newString3Func, "", bb);
 	}
 	else {
-	    GlobalVariable *str_gvar = compile_const_global_string(str,
-		    str_len);
+	    UniChar *buf = (UniChar *)CFStringGetCharactersPtr(
+		    (CFStringRef)val);
 
+	    if (buf == NULL) {
+		buf = (UniChar *)alloca(sizeof(UniChar) * str_len);
+		CFStringGetCharacters((CFStringRef)val,
+			CFRangeMake(0, str_len), buf);
+	    }
+
+	    GlobalVariable *str_gvar = compile_const_global_ustring(buf,
+		    str_len, CFHash((CFTypeRef)val));
+
 	    std::vector<Value *> idxs;
 	    idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
 	    idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
@@ -2515,8 +2558,9 @@
 	    if (newString2Func == NULL) {	
 		newString2Func = cast<Function>(
 			module->getOrInsertFunction(
-			    "rb_str_new", RubyObjTy, PtrTy, Type::Int32Ty,
-			    NULL));
+			    "rb_unicode_str_new",
+			    RubyObjTy, PointerType::getUnqual(Type::Int16Ty),
+			    Type::Int32Ty, NULL));
 	    }
 
 	    std::vector<Value *> params;

Modified: MacRuby/branches/experimental/compiler.h
===================================================================
--- MacRuby/branches/experimental/compiler.h	2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.h	2009-07-20 20:49:50 UTC (rev 2036)
@@ -66,6 +66,7 @@
 	std::vector<ID> dvars;
 	std::map<ID, Instruction *> ivar_slots_cache;
 	std::map<std::string, GlobalVariable *> static_strings;
+	std::map<CFHashCode, GlobalVariable *> static_ustrings;
 
 #if ROXOR_COMPILER_DEBUG
 	int level;
@@ -249,6 +250,9 @@
 	GlobalVariable *compile_const_global_string(const char *str) {
 	    return compile_const_global_string(str, strlen(str));
 	}
+	GlobalVariable *compile_const_global_ustring(const UniChar *str,
+		const size_t str_len, CFHashCode hash);
+
 	Value *compile_arity(rb_vm_arity_t &arity);
 	Value *compile_literal(VALUE val);
 	virtual Value *compile_immutable_literal(VALUE val);

Modified: MacRuby/branches/experimental/io.c
===================================================================
--- MacRuby/branches/experimental/io.c	2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/io.c	2009-07-20 20:49:50 UTC (rev 2036)
@@ -460,22 +460,11 @@
     }
     else {
 	buffer = (UInt8 *)RSTRING_PTR(to_write);
-	if (buffer != NULL) {
-	    length = RSTRING_LEN(to_write);
+	if (buffer == NULL) {
+	    rb_raise(rb_eRuntimeError,
+		    "could not extract a string from the read data.");
 	}
-	else {
-	    const long max = CFStringGetMaximumSizeForEncoding(
-		    CFStringGetLength((CFStringRef)to_write),
-		    kCFStringEncodingUTF8);
-
-	    buffer = (UInt8 *)alloca(max + 1);
-	    if (!CFStringGetCString((CFStringRef)to_write, (char *)buffer, 
-			max, kCFStringEncodingUTF8)) {
-		rb_raise(rb_eRuntimeError,
-			"could not extract a string from the read data.");
-	    }
-	    length = strlen((char *)buffer);
-	}
+	length = strlen((char *)buffer);
     }
 
     if (length == 0) {

Modified: MacRuby/branches/experimental/parse.y
===================================================================
--- MacRuby/branches/experimental/parse.y	2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/parse.y	2009-07-20 20:49:50 UTC (rev 2036)
@@ -290,14 +290,34 @@
 
 #if WITH_OBJC
 # define UTF8_ENC() (NULL)
+static inline VALUE
+__new_tmp_str(const char *ptr, const size_t len)
+{
+    if (ptr != NULL) {
+	CFStringRef str = CFStringCreateWithBytes(NULL, (UInt8 *)ptr, len,
+		kCFStringEncodingUTF8, false);
+	if (str != NULL) {
+	    CFMutableStringRef str2 =
+		CFStringCreateMutableCopy(NULL, 0, str);
+	    assert(str2 != NULL);
+	    CFRelease(str);
+	    return (VALUE)CFMakeCollectable(str2);
+	}
+    }
+    return rb_usascii_str_new(ptr, len);
+}
+# define STR_NEW(p,n) __new_tmp_str(p, n)
+# define STR_NEW0() __new_tmp_str(0, 0)
+# define STR_NEW2(p) __new_tmp_str(p, strlen(p))
+# define STR_NEW3(p,n,e,func) __new_tmp_str(p, n)
 #else
 # define UTF8_ENC() (parser->utf8 ? parser->utf8 : \
 		    (parser->utf8 = rb_utf8_encoding()))
+# define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
+# define STR_NEW0() rb_usascii_str_new(0,0)
+# define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
+# define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
 #endif
-#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
-#define STR_NEW0() rb_usascii_str_new(0,0)
-#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
-#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
 #if WITH_OBJC
 # define STR_ENC(m) (parser->enc)
 # define ENC_SINGLE(cr) (1)
@@ -5238,6 +5258,7 @@
     str_dsym   = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND),
 };
 
+#if 0
 static VALUE
 parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0)
 {
@@ -5266,6 +5287,7 @@
 
     return str;
 }
+#endif
 
 #define lex_goto_eol(parser) (parser->parser_lex_p = parser->parser_lex_pend)
 

Modified: MacRuby/branches/experimental/re.c
===================================================================
--- MacRuby/branches/experimental/re.c	2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/re.c	2009-07-20 20:49:50 UTC (rev 2036)
@@ -9,6 +9,7 @@
 
 **********************************************************************/
 
+#include "oniguruma.h"
 #include "ruby/ruby.h"
 #include "ruby/re.h"
 #include "ruby/encoding.h"
@@ -449,10 +450,13 @@
     rb_reg_check(re);
     cstr = RREGEXP(re)->str;
     clen = RREGEXP(re)->len;
-    if (clen == 0)
+    if (clen == 0) {
 	cstr = NULL;
+    }
     str = rb_enc_str_new(cstr, clen, rb_enc_get(re));
-    if (OBJ_TAINTED(re)) OBJ_TAINT(str);
+    if (OBJ_TAINTED(re)) {
+	OBJ_TAINT(str);
+    }
     return str;
 }
 
@@ -772,7 +776,8 @@
 }
 
 static Regexp*
-make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err)
+make_regexp(const char *s, long len, rb_encoding *enc, int flags,
+	onig_errmsg_buffer err)
 {
     Regexp *rp;
     int r;
@@ -1228,9 +1233,88 @@
 }
 #endif
 
+static void
+get_cstring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+	bool *should_free)
+{
+    if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
+	char *p = (char *)CFStringGetCStringPtr((CFStringRef)str, enc);
+	if (p != NULL) {
+	    *should_free = false;
+	}
+	else {
+	    const size_t s = CFStringGetMaximumSizeForEncoding(
+		    CFStringGetLength((CFStringRef)str), enc);
+	    p = (char *)malloc(s + 1);
+	    assert(CFStringGetCString((CFStringRef)str, p, s + 1, enc));
+	    *should_free = true;
+	}
+	*pcstr = p;
+	*pcharsize = sizeof(char);
+    }
+}
+
+static void
+get_unistring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+	bool *should_free)
+{
+    if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
+	UniChar *p = (UniChar *)CFStringGetCharactersPtr((CFStringRef)str);
+	const size_t str_len = CFStringGetLength((CFStringRef)str);
+	if (p != NULL) {
+	    *should_free = false;
+	}
+	else {
+	    const size_t s = CFStringGetMaximumSizeForEncoding(
+		    str_len, enc);
+	    p = (UniChar *)malloc(s);
+	    CFStringGetCharacters((CFStringRef)str,
+		    CFRangeMake(0, str_len),
+		    p);
+	    *should_free = true;
+	}
+	*pcstr = (char *)p;
+	*pcharsize = sizeof(UniChar);
+    }
+}
+
+static inline bool
+multibyte_encoding(rb_encoding *enc)
+{
+    return enc == (rb_encoding *)ONIG_ENCODING_UTF16_BE
+	|| enc == (rb_encoding *)ONIG_ENCODING_UTF16_LE
+	|| enc == (rb_encoding *)ONIG_ENCODING_UTF32_BE
+	|| enc == (rb_encoding *)ONIG_ENCODING_UTF32_LE;
+}
+
 static rb_encoding*
-rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
+rb_reg_prepare_enc(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+	bool *should_free)
 {
+    CFStringEncoding enc = CFStringGetFastestEncoding((CFStringRef)str);
+    switch (enc) {
+	case kCFStringEncodingMacRoman:
+	case kCFStringEncodingWindowsLatin1:
+	case kCFStringEncodingISOLatin1:
+	case kCFStringEncodingNextStepLatin:
+	case kCFStringEncodingASCII:
+	case kCFStringEncodingNonLossyASCII:
+	    get_cstring(str, enc, pcstr, pcharsize, should_free);
+	    return (rb_encoding *)ONIG_ENCODING_ASCII;
+
+	case kCFStringEncodingUTF8:
+	case kCFStringEncodingUTF16:
+	case kCFStringEncodingUTF16BE:
+	case kCFStringEncodingUTF16LE:
+	case kCFStringEncodingUTF32:
+	case kCFStringEncodingUTF32BE:
+	case kCFStringEncodingUTF32LE:
+	    get_unistring(str, enc, pcstr, pcharsize, should_free);
+	    return (rb_encoding *)ONIG_ENCODING_UTF16_LE;
+    }
+
+    rb_raise(rb_eArgError, "given string has unrecognized encoding");
+#if 0
     rb_encoding *enc = 0;
 
 #if !WITH_OBJC
@@ -1265,10 +1349,12 @@
     }
 #endif
     return enc;
+#endif
 }
 
-regex_t *
-rb_reg_prepare_re(VALUE re, VALUE str)
+static regex_t *
+rb_reg_prepare_re(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+	bool *should_free)
 {
     regex_t *reg = RREGEXP(re)->ptr;
     onig_errmsg_buffer err = "";
@@ -1277,38 +1363,49 @@
     const char *pattern;
     VALUE unescaped;
     rb_encoding *fixed_enc = 0;
-    rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
+    rb_encoding *enc = rb_reg_prepare_enc(re, str, pcstr, pcharsize,
+	    should_free);
 
-#if !WITH_OBJC
-    if (reg->enc == enc) return reg;
-#endif
+    if ((rb_encoding *)reg->enc == enc) {
+	return reg;
+    }
 
     rb_reg_check(re);
     reg = RREGEXP(re)->ptr;
     pattern = RREGEXP(re)->str;
 
-    unescaped = rb_reg_preprocess(
-	pattern, pattern + RREGEXP(re)->len, enc,
+    unescaped = rb_reg_preprocess(pattern, pattern + RREGEXP(re)->len, enc,
 	&fixed_enc, err);
 
     if (unescaped == Qnil) {
 	rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
     }
 
-#if WITH_OBJC
-    enc = (rb_encoding *)ONIG_ENCODING_ASCII;
-#endif
+    UChar *begin, *end;
+    if (multibyte_encoding(enc)) {
+	UniChar *chars = (UniChar *)CFStringGetCharactersPtr(
+		(CFStringRef)unescaped);
+	const long len = RSTRING_LEN(unescaped);
+	if (chars == NULL) {
+	    chars = (UniChar *)alloca(sizeof(UniChar) * len);
+	    CFStringGetCharacters((CFStringRef)unescaped,
+		    CFRangeMake(0, len), chars);
+	}
+	begin = (UChar *)chars;
+	end = (UChar *)chars + (sizeof(UniChar) * len);
+    }
+    else {
+	begin = (UChar *)RSTRING_PTR(unescaped);
+	end = begin + RSTRING_LEN(unescaped);
+    }
 
-    r = onig_new(&reg, (UChar* )RSTRING_PTR(unescaped),
-		 (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)),
-		 reg->options, (OnigEncoding)enc,
-		 OnigDefaultSyntax, &einfo);
-    if (r) {
+    r = onig_new(&reg, begin, end, reg->options, (OnigEncoding)enc,
+	    OnigDefaultSyntax, &einfo);
+    if (r != 0) {
 	onig_error_code_to_str((UChar*)err, r, &einfo);
 	rb_reg_raise(pattern, RREGEXP(re)->len, err, re);
     }
 
-    RB_GC_GUARD(unescaped);
     return reg;
 }
 
@@ -1321,7 +1418,7 @@
     UChar *p, *string;
 #endif
 
-    enc = rb_reg_prepare_enc(re, str, 0);
+    enc = rb_reg_prepare_enc(re, str, NULL, NULL, NULL);
 
     if (reverse) {
 	range = -pos;
@@ -1350,47 +1447,45 @@
 int
 rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
 {
-    int result;
-    VALUE match;
-    struct re_registers *pregs;
-    const char *cstr, *range;
-    long clen;
     regex_t *reg0 = RREGEXP(re)->ptr, *reg;
     int busy = FL_TEST(re, REG_BUSY);
 
-    cstr = range = RSTRING_PTR(str);
-    clen = RSTRING_LEN(str);
-#if WITH_OBJC
     static struct re_registers *regs = NULL;
     if (regs == NULL) {
 	regs = xmalloc(sizeof(struct re_registers));
 	rb_objc_root(&regs);
     }
-    pregs = regs;
-#else
-    static struct re_registers regs;
-    pregs = &regs;
-#endif
+    struct re_registers *pregs = regs;
 
+    const size_t clen = RSTRING_LEN(str);
     if (pos > clen || pos < 0) {
 	rb_backref_set(Qnil);
 	return -1;
     }
 
-    reg = rb_reg_prepare_re(re, str);
+    char *cstr = NULL;
+    size_t charsize = 0;
+    bool should_free = false;
+    reg = rb_reg_prepare_re(re, str, &cstr, &charsize, &should_free);
 
+    char *range = cstr;
     FL_SET(re, REG_BUSY);
     if (!reverse) {
-	range += RSTRING_LEN(str);
+	range += (clen * charsize);
     }
     MEMZERO(pregs, struct re_registers, 1);
-    result = onig_search(RREGEXP(re)->ptr,
-			 (UChar*)cstr,
-			 ((UChar*)cstr + clen),
-			 ((UChar*)cstr + pos),
-			 ((UChar*)range),
-			 pregs, ONIG_OPTION_NONE);
+    int result = onig_search(reg,
+	    (UChar*)cstr,
+	    ((UChar*)cstr + (clen * charsize)),
+	    ((UChar*)cstr + (pos * charsize)),
+	    ((UChar*)range),
+	    pregs, ONIG_OPTION_NONE);
 
+    if (should_free) {
+	free(cstr);
+	cstr = NULL;
+    }
+
     if (RREGEXP(re)->ptr != reg) {
 	if (busy) {
 	    onig_free(reg);
@@ -1400,7 +1495,9 @@
 	    RREGEXP(re)->ptr = reg;
 	}
     }
-    if (!busy) FL_UNSET(re, REG_BUSY);
+    if (!busy) {
+	FL_UNSET(re, REG_BUSY);
+    }
     if (result < 0) {
 	onig_region_free(pregs, 0);
 	if (result == ONIG_MISMATCH) {
@@ -1414,10 +1511,26 @@
 	}
     }
 
+    if (charsize > 1) {
+	int i;
+	for (i = 0; i < pregs->num_regs; i++) {
+	    if (pregs->beg[i] > 0) {
+		assert((pregs->beg[i] % charsize) == 0);
+		pregs->beg[i] /= charsize;
+	    }
+	    if (pregs->end[i] > 0) {
+		assert((pregs->end[i] % charsize) == 0);
+		pregs->end[i] /= charsize;
+	    }
+	}
+	assert((result % charsize) == 0);
+	result /= charsize;
+    }
+
 #if WITH_OBJC
-    match = match_alloc(rb_cMatch, 0);
+    VALUE match = match_alloc(rb_cMatch, 0);
 #else
-    match = rb_backref_get();
+    VALUE match = rb_backref_get();
     if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
 	match = match_alloc(rb_cMatch);
     }
@@ -1467,7 +1580,9 @@
     long start, end, len;
     struct re_registers *regs;
 
-    if (NIL_P(match)) return Qnil;
+    if (NIL_P(match)) {
+	return Qnil;
+    }
     match_check(match);
     regs = RMATCH_REGS(match);
     if (nth >= regs->num_regs) {
@@ -1475,14 +1590,17 @@
     }
     if (nth < 0) {
 	nth += regs->num_regs;
-	if (nth <= 0) return Qnil;
+	if (nth <= 0) {
+	    return Qnil;
+	}
     }
     start = BEG(nth);
-    if (start == -1) return Qnil;
+    if (start == -1) {
+	return Qnil;
+    }
     end = END(nth);
     len = end - start;
     str = rb_str_subseq(RMATCH(match)->str, start, len);
-    OBJ_INFECT(str, match);
     return str;
 }
 
@@ -1794,9 +1912,15 @@
     VALUE str = rb_reg_last_match(match);
 
     match_check(match);
-    if (NIL_P(str)) str = rb_str_new(0,0);
-    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
-    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
+    if (NIL_P(str)) {
+	str = rb_str_new(0,0);
+    }
+    if (OBJ_TAINTED(match)) {
+	OBJ_TAINT(str);
+    }
+    else if (OBJ_TAINTED(RMATCH(match)->str)) {
+	OBJ_TAINT(str);
+    }
     return str;
 }
 
@@ -1886,9 +2010,10 @@
     for (i = 0; i < num_regs; i++) {
         VALUE v;
         rb_str_buf_cat2(str, " ");
-        if (0 < i) {
-            if (names[i].name)
+        if (i > 0) {
+            if (names[i].name) { 
                 rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
+	    }
             else {
                 char buf[sizeof(i)*3+1];
                 snprintf(buf, sizeof(buf), "%d", i);
@@ -1897,10 +2022,12 @@
             rb_str_buf_cat2(str, ":");
         }
         v = rb_reg_nth_match(i, match);
-        if (v == Qnil)
+        if (v == Qnil) {
             rb_str_buf_cat2(str, "nil");
-        else
+	}
+        else {
             rb_str_buf_append(str, rb_str_inspect(v, 0));
+	}
     }
     rb_str_buf_cat2(str, ">");
 
@@ -2324,7 +2451,6 @@
     enc = rb_enc_get(str);
 
     buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
-    RB_GC_GUARD(str);
 
     if (buf == Qnil) {
 	return rb_reg_error_desc(str, 0, err);
@@ -2404,15 +2530,21 @@
     rb_encoding *a_enc = rb_ascii8bit_encoding();
 #endif
 
-    if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)
+    if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4) {
 	rb_raise(rb_eSecurityError, "Insecure: can't modify regexp");
+    }
     rb_check_frozen(obj);
-    if (FL_TEST(obj, REG_LITERAL))
+    if (FL_TEST(obj, REG_LITERAL)) {
 	rb_raise(rb_eSecurityError, "can't modify literal regexp");
-    if (re->ptr) onig_free(re->ptr);
-    if (re->str) xfree(re->str);
-    re->ptr = 0;
-    re->str = 0;
+    }
+    if (re->ptr != NULL) {
+	onig_free(re->ptr);
+    }
+    if (re->str != NULL) {
+	xfree(re->str);
+    }
+    re->ptr = NULL;
+    re->str = NULL;
 
     unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
     if (unescaped == Qnil)
@@ -2444,23 +2576,24 @@
     if (options & ARG_ENCODING_NONE) {
         re->basic.flags |= REG_ENCODING_NONE;
     }
-    
-    GC_WB(&re->ptr, make_regexp(RSTRING_PTR(unescaped), 
-				RSTRING_LEN(unescaped), enc,
-                                options & ARG_REG_OPTION_MASK, err));
-    if (!re->ptr) return -1;
+   
+    Regexp *reg = make_regexp(RSTRING_PTR(unescaped), 
+	    RSTRING_LEN(unescaped), enc,
+	    options & ARG_REG_OPTION_MASK, err);
+    if (reg == NULL) {
+	return -1;
+    }
+    GC_WB(&re->ptr, reg);
     GC_WB(&re->str, ALLOC_N(char, len+1));
     memcpy(re->str, s, len);
     re->str[len] = '\0';
     re->len = len;
-    RB_GC_GUARD(unescaped);
     return 0;
 }
 
 static int
 rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err)
 {
-    int ret;
     rb_encoding *enc = rb_enc_get(str);
     if (options & ARG_ENCODING_NONE) {
 #if !WITH_OBJC
@@ -2475,10 +2608,8 @@
         }
 #endif
     }
-    ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
-			    options, err);
-    RB_GC_GUARD(str);
-    return ret;
+    return rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
+	    options, err);
 }
 
 static VALUE

Modified: MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb
===================================================================
--- MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb	2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb	2009-07-20 20:49:50 UTC (rev 2036)
@@ -62,7 +62,7 @@
   end
 
   it "can have a singleton class" do
-    a = NSDictionary.array
+    a = NSDictionary.dictionary
     def a.foo; 42; end
     a.foo.should == 42
     lambda { a[42] = 123 }.should raise_error(RuntimeError)

Modified: MacRuby/branches/experimental/string.c
===================================================================
--- MacRuby/branches/experimental/string.c	2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/string.c	2009-07-20 20:49:50 UTC (rev 2036)
@@ -157,6 +157,15 @@
 }
 
 VALUE
+rb_unicode_str_new(const UniChar *ptr, const size_t len)
+{
+    VALUE str = str_alloc(rb_cString);
+    CFStringAppendCharacters((CFMutableStringRef)str,
+	    ptr, len);
+    return str;
+}
+
+VALUE
 rb_str_new(const char *ptr, long len)
 {
     return str_new(rb_cString, ptr, len);
@@ -165,17 +174,13 @@
 VALUE
 rb_usascii_str_new(const char *ptr, long len)
 {
-    VALUE str = str_new(rb_cString, ptr, len);
-
-    return str;
+    return str_new(rb_cString, ptr, len);
 }
 
 VALUE
 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
 {
-    VALUE str = str_new(rb_cString, ptr, len);
-
-    return str;
+    return str_new(rb_cString, ptr, len);
 }
 
 VALUE
@@ -592,8 +597,12 @@
 	    kCFStringEncodingUTF8);
 
     cptr = (char *)xmalloc(max + 1);
-    assert(CFStringGetCString((CFStringRef)ptr, cptr,
-		max, kCFStringEncodingUTF8));
+    if (!CFStringGetCString((CFStringRef)ptr, cptr,
+		max + 1, kCFStringEncodingUTF8)) {
+	// Probably a UTF16 string...
+	xfree(cptr);
+	return NULL;
+    }
 
     return cptr;
 }
@@ -817,6 +826,9 @@
 	Check_Type(str2, T_STRING);
     }
 
+    CFStringAppend((CFMutableStringRef)str, (CFStringRef)str2);
+
+#if 0
     const char *ptr;
     long len;
 
@@ -824,6 +836,7 @@
     len = RSTRING_LEN(str2);
 
     rb_objc_str_cat(str, ptr, len, kCFStringEncodingASCII);
+#endif
 
     return str;
 }
@@ -1892,13 +1905,12 @@
 static VALUE
 rb_str_sub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    VALUE pat, repl, match, hash = Qnil;
-    struct re_registers *regs;
-    int iter = 0;
-    int tainted = 0;
+    VALUE repl, hash = Qnil;
+    bool iter = false;
+    bool tainted = false;
 
     if (argc == 1 && rb_block_given_p()) {
-	iter = 1;
+	iter = true;
     }
     else if (argc == 2) {
 	repl = argv[1];
@@ -1906,30 +1918,33 @@
 	if (NIL_P(hash)) {
 	    StringValue(repl);
 	}
-	if (OBJ_TAINTED(repl)) tainted = 1;
+	if (OBJ_TAINTED(repl)) {
+	    tainted = true;
+	}
     }
     else {
 	rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
     }
 
-    pat = get_pat(argv[0], 1);
+    VALUE pat = get_pat(argv[0], 1);
     if (rb_reg_search(pat, str, 0, 0) >= 0) {
+	VALUE match = rb_backref_get();
+	struct re_registers *regs = RMATCH_REGS(match);
 
-	match = rb_backref_get();
-	regs = RMATCH_REGS(match);
-
 	if (iter || !NIL_P(hash)) {
-
             if (iter) {
                 rb_match_busy(match);
                 repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
             }
             else {
-                repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
+                repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+			    END(0) - BEG(0)));
                 repl = rb_obj_as_string(repl);
             }
 	    str_frozen_check(str);
-	    if (iter) rb_backref_set(match);
+	    if (iter) {
+		rb_backref_set(match);
+	    }
 	}
 	else {
 	    repl = rb_reg_regsub(repl, str, regs, pat);
@@ -1937,16 +1952,18 @@
 
 	rb_str_modify(str);
 	rb_str_splice_0(str, BEG(0), END(0) - BEG(0), repl);
-	if (OBJ_TAINTED(repl)) tainted = 1;
+	if (OBJ_TAINTED(repl)) {
+	    tainted = true;
+	}
 
-	if (tainted) OBJ_TAINT(str);
-
+	if (tainted) {
+	    OBJ_TAINT(str);
+	}
 	return str;
     }
     return Qnil;
 }
 
-
 /*
  *  call-seq:
  *     str.sub(pattern, replacement)         => new_str
@@ -1989,37 +2006,35 @@
 static VALUE
 str_gsub(SEL sel, int argc, VALUE *argv, VALUE str, int bang)
 {
-    VALUE pat, val, repl, match, dest, hash = Qnil;
-    struct re_registers *regs;
-    long beg, n;
-    long offset, slen, len;
-    int iter = 0;
-    const char *sp, *cp;
-    int tainted = 0;
-    rb_encoding *str_enc;
-    
+    bool iter = false;
+    bool tainted = false;
+    VALUE hash = Qnil, repl = Qnil;
+ 
     switch (argc) {
-      case 1:
-	RETURN_ENUMERATOR(str, argc, argv);
-	iter = 1;
-	break;
-      case 2:
-	repl = argv[1];
-	hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
-	if (NIL_P(hash)) {
-	    StringValue(repl);
-	}
-	if (OBJ_TAINTED(repl)) {
-	    tainted = 1;
-	}
-	break;
-      default:
-	rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
+	case 1:
+	    RETURN_ENUMERATOR(str, argc, argv);
+	    iter = true;
+	    break;
+
+	case 2:
+	    repl = argv[1];
+	    hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
+	    if (NIL_P(hash)) {
+		StringValue(repl);
+	    }
+	    if (OBJ_TAINTED(repl)) {
+		tainted = true;
+	    }
+	    break;
+
+	default:
+	    rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)",
+		    argc);
     }
 
-    pat = get_pat(argv[0], 1);
-    offset=0; n=0;
-    beg = rb_reg_search(pat, str, 0, 0);
+    VALUE pat = get_pat(argv[0], 1);
+    long offset = 0;
+    long beg = rb_reg_search(pat, str, 0, 0);
     if (beg < 0) {
 	if (bang) {
 	    return Qnil;	/* no match, no substitution */
@@ -2027,23 +2042,23 @@
 	return rb_str_new3(str);
     }
 
-    dest = rb_str_new5(str, NULL, 0);
-    slen = RSTRING_LEN(str);
-    sp = RSTRING_PTR(str);
-    cp = sp;
-    str_enc = NULL;
+    VALUE dest = rb_str_new5(str, NULL, 0);
+    long slen = RSTRING_LEN(str);
+    VALUE match;
 
     do {
-	n++;
 	match = rb_backref_get();
-	regs = RMATCH_REGS(match);
+	struct re_registers *regs = RMATCH_REGS(match);
+        VALUE val;
+
 	if (iter || !NIL_P(hash)) {
             if (iter) {
                 rb_match_busy(match);
                 val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
             }
             else {
-                val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
+                val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+			    END(0) - BEG(0)));
                 val = rb_obj_as_string(val);
             }
 	    str_mod_check(str, sp, slen);
@@ -2062,13 +2077,15 @@
 	    val = rb_reg_regsub(repl, str, regs, pat);
 	}
 
+
 	if (OBJ_TAINTED(val)) {
-	    tainted = 1;
+	    tainted = true;
 	}
 
-	len = beg - offset;	/* copy pre-match substr */
-        if (len) {
-	    rb_enc_str_buf_cat(dest, cp, len, str_enc);
+	long len = beg - offset;  /* copy pre-match substr */
+        if (len > 0) {
+	    rb_str_buf_append(dest, rb_str_subseq(str, offset, len));
+	    //rb_enc_str_buf_cat(dest, cp, len, str_enc);
         }
 
         rb_str_buf_append(dest, val);
@@ -2079,18 +2096,24 @@
 	     * Always consume at least one character of the input string
 	     * in order to prevent infinite loops.
 	     */
-	    if (slen <= END(0)) break;
+	    if (slen <= END(0)) {
+		break;
+	    }
 	    len = 1;
-            rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
+	    rb_str_buf_append(dest, rb_str_subseq(str, END(0), len));
+            //rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
 	    offset = END(0) + len;
 	}
-	cp = sp + offset;
-	if (offset > slen) break;
+	if (offset > slen) {
+	    break;
+	}
 	beg = rb_reg_search(pat, str, offset, 0);
-    } 
+    }
     while (beg >= 0);
+
     if (slen > offset) {
-        rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
+	rb_str_buf_append(dest, rb_str_subseq(str, offset, slen - offset));
+        //rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
     }
     rb_backref_set(match);
     if (bang) {
@@ -2099,7 +2122,7 @@
     }
     else {
     	if (!tainted && OBJ_TAINTED(str)) {
-	    tainted = 1;
+	    tainted = true;
 	}
 	str = dest;
     }
@@ -2443,6 +2466,7 @@
     return str;
 }
 
+#if 0
 static void
 str_cat_char(VALUE str, int c, rb_encoding *enc)
 {
@@ -2458,6 +2482,7 @@
     str_cat_char(str, '\\', enc);
     str_cat_char(str, c, enc);
 }
+#endif
 
 /*
  * call-seq:
@@ -2471,13 +2496,37 @@
  *    str.inspect       #=> "\"hel\\bo\""
  */
 
+static inline void
+__append(CFMutableStringRef out, UniChar c, bool prefix)
+{
+    CFStringAppendCharacters(out, &c, 1);
+}
+
 VALUE
 rb_str_inspect(VALUE str, SEL sel)
 {
-    rb_encoding *enc = STR_ENC_GET(str);
+    const long len = CFStringGetLength((CFStringRef)str);
+    CFStringInlineBuffer buf; 
+    CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, len));
+
+    CFMutableStringRef out = CFStringCreateMutable(NULL, 0);
+    __append(out, '"', false);
+
+    long i;
+    for (i = 0; i < len; i++) {
+	UniChar c = CFStringGetCharacterFromInlineBuffer(&buf, i);
+	__append(out, c, false);
+    }
+    __append(out, '"', false);
+
+    return (VALUE)CFMakeCollectable(out);
+
+#if 0
     const char *p, *pend;
     VALUE result;
 
+
+
     p = RSTRING_PTR(str); 
     pend = p + RSTRING_LEN(str);
     if (p == NULL) {
@@ -2545,6 +2594,7 @@
     str_cat_char(result, '"', enc);
 
     return result;
+#endif
 }
 
 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20090720/67a4dbba/attachment-0001.html>


More information about the macruby-changes mailing list