[macruby-changes] [2036] MacRuby/branches/experimental
source_changes at macosforge.org
source_changes at macosforge.org
Mon Jul 20 13:49:50 PDT 2009
Revision: 2036
http://trac.macosforge.org/projects/ruby/changeset/2036
Author: lsansonetti at apple.com
Date: 2009-07-20 13:49:50 -0700 (Mon, 20 Jul 2009)
Log Message:
-----------
some work on unicode support (still in progress)
Modified Paths:
--------------
MacRuby/branches/experimental/compiler.cpp
MacRuby/branches/experimental/compiler.h
MacRuby/branches/experimental/io.c
MacRuby/branches/experimental/parse.y
MacRuby/branches/experimental/re.c
MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb
MacRuby/branches/experimental/string.c
Modified: MacRuby/branches/experimental/compiler.cpp
===================================================================
--- MacRuby/branches/experimental/compiler.cpp 2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.cpp 2009-07-20 20:49:50 UTC (rev 2036)
@@ -138,7 +138,7 @@
falseVal = ConstantInt::get(RubyObjTy, Qfalse);
undefVal = ConstantInt::get(RubyObjTy, Qundef);
splatArgFollowsVal = ConstantInt::get(RubyObjTy, SPLAT_ARG_FOLLOWS);
- cObject = ConstantInt::get(RubyObjTy, (long)rb_cObject);
+ cObject = ConstantInt::get(RubyObjTy, rb_cObject);
PtrTy = PointerType::getUnqual(Type::Int8Ty);
PtrPtrTy = PointerType::getUnqual(PtrTy);
Int32PtrTy = PointerType::getUnqual(Type::Int32Ty);
@@ -407,21 +407,56 @@
}
GlobalVariable *
+RoxorCompiler::compile_const_global_ustring(const UniChar *str,
+ const size_t len, CFHashCode hash)
+{
+ assert(len > 0);
+
+ std::map<CFHashCode, GlobalVariable *>::iterator iter =
+ static_ustrings.find(hash);
+
+ GlobalVariable *gvar;
+ if (iter == static_ustrings.end()) {
+ const ArrayType *str_type = ArrayType::get(Type::Int16Ty, len);
+
+ std::vector<Constant *> ary_elements;
+ for (unsigned int i = 0; i < len; i++) {
+ ary_elements.push_back(ConstantInt::get(Type::Int16Ty, str[i]));
+ }
+
+ gvar = new GlobalVariable(
+ str_type,
+ true,
+ GlobalValue::InternalLinkage,
+ ConstantArray::get(str_type, ary_elements),
+ "",
+ RoxorCompiler::module);
+
+ static_ustrings[hash] = gvar;
+ }
+ else {
+ gvar = iter->second;
+ }
+
+ return gvar;
+}
+
+GlobalVariable *
RoxorCompiler::compile_const_global_string(const char *str,
- const size_t str_len)
+ const size_t len)
{
- assert(str_len > 0);
+ assert(len > 0);
- std::string s(str, str_len);
+ std::string s(str, len);
std::map<std::string, GlobalVariable *>::iterator iter =
static_strings.find(s);
GlobalVariable *gvar;
if (iter == static_strings.end()) {
- const ArrayType *str_type = ArrayType::get(Type::Int8Ty, str_len + 1);
+ const ArrayType *str_type = ArrayType::get(Type::Int8Ty, len + 1);
std::vector<Constant *> ary_elements;
- for (unsigned int i = 0; i < str_len; i++) {
+ for (unsigned int i = 0; i < len; i++) {
ary_elements.push_back(ConstantInt::get(Type::Int8Ty, str[i]));
}
ary_elements.push_back(ConstantInt::get(Type::Int8Ty, 0));
@@ -2492,7 +2527,6 @@
//
// 10.times { s = 'foo'; s << 'bar' }
//
- const char *str = RSTRING_PTR(val);
const size_t str_len = RSTRING_LEN(val);
if (str_len == 0) {
if (newString3Func == NULL) {
@@ -2503,9 +2537,18 @@
return CallInst::Create(newString3Func, "", bb);
}
else {
- GlobalVariable *str_gvar = compile_const_global_string(str,
- str_len);
+ UniChar *buf = (UniChar *)CFStringGetCharactersPtr(
+ (CFStringRef)val);
+ if (buf == NULL) {
+ buf = (UniChar *)alloca(sizeof(UniChar) * str_len);
+ CFStringGetCharacters((CFStringRef)val,
+ CFRangeMake(0, str_len), buf);
+ }
+
+ GlobalVariable *str_gvar = compile_const_global_ustring(buf,
+ str_len, CFHash((CFTypeRef)val));
+
std::vector<Value *> idxs;
idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
@@ -2515,8 +2558,9 @@
if (newString2Func == NULL) {
newString2Func = cast<Function>(
module->getOrInsertFunction(
- "rb_str_new", RubyObjTy, PtrTy, Type::Int32Ty,
- NULL));
+ "rb_unicode_str_new",
+ RubyObjTy, PointerType::getUnqual(Type::Int16Ty),
+ Type::Int32Ty, NULL));
}
std::vector<Value *> params;
Modified: MacRuby/branches/experimental/compiler.h
===================================================================
--- MacRuby/branches/experimental/compiler.h 2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/compiler.h 2009-07-20 20:49:50 UTC (rev 2036)
@@ -66,6 +66,7 @@
std::vector<ID> dvars;
std::map<ID, Instruction *> ivar_slots_cache;
std::map<std::string, GlobalVariable *> static_strings;
+ std::map<CFHashCode, GlobalVariable *> static_ustrings;
#if ROXOR_COMPILER_DEBUG
int level;
@@ -249,6 +250,9 @@
GlobalVariable *compile_const_global_string(const char *str) {
return compile_const_global_string(str, strlen(str));
}
+ GlobalVariable *compile_const_global_ustring(const UniChar *str,
+ const size_t str_len, CFHashCode hash);
+
Value *compile_arity(rb_vm_arity_t &arity);
Value *compile_literal(VALUE val);
virtual Value *compile_immutable_literal(VALUE val);
Modified: MacRuby/branches/experimental/io.c
===================================================================
--- MacRuby/branches/experimental/io.c 2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/io.c 2009-07-20 20:49:50 UTC (rev 2036)
@@ -460,22 +460,11 @@
}
else {
buffer = (UInt8 *)RSTRING_PTR(to_write);
- if (buffer != NULL) {
- length = RSTRING_LEN(to_write);
+ if (buffer == NULL) {
+ rb_raise(rb_eRuntimeError,
+ "could not extract a string from the read data.");
}
- else {
- const long max = CFStringGetMaximumSizeForEncoding(
- CFStringGetLength((CFStringRef)to_write),
- kCFStringEncodingUTF8);
-
- buffer = (UInt8 *)alloca(max + 1);
- if (!CFStringGetCString((CFStringRef)to_write, (char *)buffer,
- max, kCFStringEncodingUTF8)) {
- rb_raise(rb_eRuntimeError,
- "could not extract a string from the read data.");
- }
- length = strlen((char *)buffer);
- }
+ length = strlen((char *)buffer);
}
if (length == 0) {
Modified: MacRuby/branches/experimental/parse.y
===================================================================
--- MacRuby/branches/experimental/parse.y 2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/parse.y 2009-07-20 20:49:50 UTC (rev 2036)
@@ -290,14 +290,34 @@
#if WITH_OBJC
# define UTF8_ENC() (NULL)
+static inline VALUE
+__new_tmp_str(const char *ptr, const size_t len)
+{
+ if (ptr != NULL) {
+ CFStringRef str = CFStringCreateWithBytes(NULL, (UInt8 *)ptr, len,
+ kCFStringEncodingUTF8, false);
+ if (str != NULL) {
+ CFMutableStringRef str2 =
+ CFStringCreateMutableCopy(NULL, 0, str);
+ assert(str2 != NULL);
+ CFRelease(str);
+ return (VALUE)CFMakeCollectable(str2);
+ }
+ }
+ return rb_usascii_str_new(ptr, len);
+}
+# define STR_NEW(p,n) __new_tmp_str(p, n)
+# define STR_NEW0() __new_tmp_str(0, 0)
+# define STR_NEW2(p) __new_tmp_str(p, strlen(p))
+# define STR_NEW3(p,n,e,func) __new_tmp_str(p, n)
#else
# define UTF8_ENC() (parser->utf8 ? parser->utf8 : \
(parser->utf8 = rb_utf8_encoding()))
+# define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
+# define STR_NEW0() rb_usascii_str_new(0,0)
+# define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
+# define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
#endif
-#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
-#define STR_NEW0() rb_usascii_str_new(0,0)
-#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
-#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
#if WITH_OBJC
# define STR_ENC(m) (parser->enc)
# define ENC_SINGLE(cr) (1)
@@ -5238,6 +5258,7 @@
str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND),
};
+#if 0
static VALUE
parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0)
{
@@ -5266,6 +5287,7 @@
return str;
}
+#endif
#define lex_goto_eol(parser) (parser->parser_lex_p = parser->parser_lex_pend)
Modified: MacRuby/branches/experimental/re.c
===================================================================
--- MacRuby/branches/experimental/re.c 2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/re.c 2009-07-20 20:49:50 UTC (rev 2036)
@@ -9,6 +9,7 @@
**********************************************************************/
+#include "oniguruma.h"
#include "ruby/ruby.h"
#include "ruby/re.h"
#include "ruby/encoding.h"
@@ -449,10 +450,13 @@
rb_reg_check(re);
cstr = RREGEXP(re)->str;
clen = RREGEXP(re)->len;
- if (clen == 0)
+ if (clen == 0) {
cstr = NULL;
+ }
str = rb_enc_str_new(cstr, clen, rb_enc_get(re));
- if (OBJ_TAINTED(re)) OBJ_TAINT(str);
+ if (OBJ_TAINTED(re)) {
+ OBJ_TAINT(str);
+ }
return str;
}
@@ -772,7 +776,8 @@
}
static Regexp*
-make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err)
+make_regexp(const char *s, long len, rb_encoding *enc, int flags,
+ onig_errmsg_buffer err)
{
Regexp *rp;
int r;
@@ -1228,9 +1233,88 @@
}
#endif
+static void
+get_cstring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+ bool *should_free)
+{
+ if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
+ char *p = (char *)CFStringGetCStringPtr((CFStringRef)str, enc);
+ if (p != NULL) {
+ *should_free = false;
+ }
+ else {
+ const size_t s = CFStringGetMaximumSizeForEncoding(
+ CFStringGetLength((CFStringRef)str), enc);
+ p = (char *)malloc(s + 1);
+ assert(CFStringGetCString((CFStringRef)str, p, s + 1, enc));
+ *should_free = true;
+ }
+ *pcstr = p;
+ *pcharsize = sizeof(char);
+ }
+}
+
+static void
+get_unistring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
+ bool *should_free)
+{
+ if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
+ UniChar *p = (UniChar *)CFStringGetCharactersPtr((CFStringRef)str);
+ const size_t str_len = CFStringGetLength((CFStringRef)str);
+ if (p != NULL) {
+ *should_free = false;
+ }
+ else {
+ const size_t s = CFStringGetMaximumSizeForEncoding(
+ str_len, enc);
+ p = (UniChar *)malloc(s);
+ CFStringGetCharacters((CFStringRef)str,
+ CFRangeMake(0, str_len),
+ p);
+ *should_free = true;
+ }
+ *pcstr = (char *)p;
+ *pcharsize = sizeof(UniChar);
+ }
+}
+
+static inline bool
+multibyte_encoding(rb_encoding *enc)
+{
+ return enc == (rb_encoding *)ONIG_ENCODING_UTF16_BE
+ || enc == (rb_encoding *)ONIG_ENCODING_UTF16_LE
+ || enc == (rb_encoding *)ONIG_ENCODING_UTF32_BE
+ || enc == (rb_encoding *)ONIG_ENCODING_UTF32_LE;
+}
+
static rb_encoding*
-rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
+rb_reg_prepare_enc(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+ bool *should_free)
{
+ CFStringEncoding enc = CFStringGetFastestEncoding((CFStringRef)str);
+ switch (enc) {
+ case kCFStringEncodingMacRoman:
+ case kCFStringEncodingWindowsLatin1:
+ case kCFStringEncodingISOLatin1:
+ case kCFStringEncodingNextStepLatin:
+ case kCFStringEncodingASCII:
+ case kCFStringEncodingNonLossyASCII:
+ get_cstring(str, enc, pcstr, pcharsize, should_free);
+ return (rb_encoding *)ONIG_ENCODING_ASCII;
+
+ case kCFStringEncodingUTF8:
+ case kCFStringEncodingUTF16:
+ case kCFStringEncodingUTF16BE:
+ case kCFStringEncodingUTF16LE:
+ case kCFStringEncodingUTF32:
+ case kCFStringEncodingUTF32BE:
+ case kCFStringEncodingUTF32LE:
+ get_unistring(str, enc, pcstr, pcharsize, should_free);
+ return (rb_encoding *)ONIG_ENCODING_UTF16_LE;
+ }
+
+ rb_raise(rb_eArgError, "given string has unrecognized encoding");
+#if 0
rb_encoding *enc = 0;
#if !WITH_OBJC
@@ -1265,10 +1349,12 @@
}
#endif
return enc;
+#endif
}
-regex_t *
-rb_reg_prepare_re(VALUE re, VALUE str)
+static regex_t *
+rb_reg_prepare_re(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
+ bool *should_free)
{
regex_t *reg = RREGEXP(re)->ptr;
onig_errmsg_buffer err = "";
@@ -1277,38 +1363,49 @@
const char *pattern;
VALUE unescaped;
rb_encoding *fixed_enc = 0;
- rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
+ rb_encoding *enc = rb_reg_prepare_enc(re, str, pcstr, pcharsize,
+ should_free);
-#if !WITH_OBJC
- if (reg->enc == enc) return reg;
-#endif
+ if ((rb_encoding *)reg->enc == enc) {
+ return reg;
+ }
rb_reg_check(re);
reg = RREGEXP(re)->ptr;
pattern = RREGEXP(re)->str;
- unescaped = rb_reg_preprocess(
- pattern, pattern + RREGEXP(re)->len, enc,
+ unescaped = rb_reg_preprocess(pattern, pattern + RREGEXP(re)->len, enc,
&fixed_enc, err);
if (unescaped == Qnil) {
rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
}
-#if WITH_OBJC
- enc = (rb_encoding *)ONIG_ENCODING_ASCII;
-#endif
+ UChar *begin, *end;
+ if (multibyte_encoding(enc)) {
+ UniChar *chars = (UniChar *)CFStringGetCharactersPtr(
+ (CFStringRef)unescaped);
+ const long len = RSTRING_LEN(unescaped);
+ if (chars == NULL) {
+ chars = (UniChar *)alloca(sizeof(UniChar) * len);
+ CFStringGetCharacters((CFStringRef)unescaped,
+ CFRangeMake(0, len), chars);
+ }
+ begin = (UChar *)chars;
+ end = (UChar *)chars + (sizeof(UniChar) * len);
+ }
+ else {
+ begin = (UChar *)RSTRING_PTR(unescaped);
+ end = begin + RSTRING_LEN(unescaped);
+ }
- r = onig_new(®, (UChar* )RSTRING_PTR(unescaped),
- (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)),
- reg->options, (OnigEncoding)enc,
- OnigDefaultSyntax, &einfo);
- if (r) {
+ r = onig_new(®, begin, end, reg->options, (OnigEncoding)enc,
+ OnigDefaultSyntax, &einfo);
+ if (r != 0) {
onig_error_code_to_str((UChar*)err, r, &einfo);
rb_reg_raise(pattern, RREGEXP(re)->len, err, re);
}
- RB_GC_GUARD(unescaped);
return reg;
}
@@ -1321,7 +1418,7 @@
UChar *p, *string;
#endif
- enc = rb_reg_prepare_enc(re, str, 0);
+ enc = rb_reg_prepare_enc(re, str, NULL, NULL, NULL);
if (reverse) {
range = -pos;
@@ -1350,47 +1447,45 @@
int
rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
{
- int result;
- VALUE match;
- struct re_registers *pregs;
- const char *cstr, *range;
- long clen;
regex_t *reg0 = RREGEXP(re)->ptr, *reg;
int busy = FL_TEST(re, REG_BUSY);
- cstr = range = RSTRING_PTR(str);
- clen = RSTRING_LEN(str);
-#if WITH_OBJC
static struct re_registers *regs = NULL;
if (regs == NULL) {
regs = xmalloc(sizeof(struct re_registers));
rb_objc_root(®s);
}
- pregs = regs;
-#else
- static struct re_registers regs;
- pregs = ®s;
-#endif
+ struct re_registers *pregs = regs;
+ const size_t clen = RSTRING_LEN(str);
if (pos > clen || pos < 0) {
rb_backref_set(Qnil);
return -1;
}
- reg = rb_reg_prepare_re(re, str);
+ char *cstr = NULL;
+ size_t charsize = 0;
+ bool should_free = false;
+ reg = rb_reg_prepare_re(re, str, &cstr, &charsize, &should_free);
+ char *range = cstr;
FL_SET(re, REG_BUSY);
if (!reverse) {
- range += RSTRING_LEN(str);
+ range += (clen * charsize);
}
MEMZERO(pregs, struct re_registers, 1);
- result = onig_search(RREGEXP(re)->ptr,
- (UChar*)cstr,
- ((UChar*)cstr + clen),
- ((UChar*)cstr + pos),
- ((UChar*)range),
- pregs, ONIG_OPTION_NONE);
+ int result = onig_search(reg,
+ (UChar*)cstr,
+ ((UChar*)cstr + (clen * charsize)),
+ ((UChar*)cstr + (pos * charsize)),
+ ((UChar*)range),
+ pregs, ONIG_OPTION_NONE);
+ if (should_free) {
+ free(cstr);
+ cstr = NULL;
+ }
+
if (RREGEXP(re)->ptr != reg) {
if (busy) {
onig_free(reg);
@@ -1400,7 +1495,9 @@
RREGEXP(re)->ptr = reg;
}
}
- if (!busy) FL_UNSET(re, REG_BUSY);
+ if (!busy) {
+ FL_UNSET(re, REG_BUSY);
+ }
if (result < 0) {
onig_region_free(pregs, 0);
if (result == ONIG_MISMATCH) {
@@ -1414,10 +1511,26 @@
}
}
+ if (charsize > 1) {
+ int i;
+ for (i = 0; i < pregs->num_regs; i++) {
+ if (pregs->beg[i] > 0) {
+ assert((pregs->beg[i] % charsize) == 0);
+ pregs->beg[i] /= charsize;
+ }
+ if (pregs->end[i] > 0) {
+ assert((pregs->end[i] % charsize) == 0);
+ pregs->end[i] /= charsize;
+ }
+ }
+ assert((result % charsize) == 0);
+ result /= charsize;
+ }
+
#if WITH_OBJC
- match = match_alloc(rb_cMatch, 0);
+ VALUE match = match_alloc(rb_cMatch, 0);
#else
- match = rb_backref_get();
+ VALUE match = rb_backref_get();
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
match = match_alloc(rb_cMatch);
}
@@ -1467,7 +1580,9 @@
long start, end, len;
struct re_registers *regs;
- if (NIL_P(match)) return Qnil;
+ if (NIL_P(match)) {
+ return Qnil;
+ }
match_check(match);
regs = RMATCH_REGS(match);
if (nth >= regs->num_regs) {
@@ -1475,14 +1590,17 @@
}
if (nth < 0) {
nth += regs->num_regs;
- if (nth <= 0) return Qnil;
+ if (nth <= 0) {
+ return Qnil;
+ }
}
start = BEG(nth);
- if (start == -1) return Qnil;
+ if (start == -1) {
+ return Qnil;
+ }
end = END(nth);
len = end - start;
str = rb_str_subseq(RMATCH(match)->str, start, len);
- OBJ_INFECT(str, match);
return str;
}
@@ -1794,9 +1912,15 @@
VALUE str = rb_reg_last_match(match);
match_check(match);
- if (NIL_P(str)) str = rb_str_new(0,0);
- if (OBJ_TAINTED(match)) OBJ_TAINT(str);
- if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
+ if (NIL_P(str)) {
+ str = rb_str_new(0,0);
+ }
+ if (OBJ_TAINTED(match)) {
+ OBJ_TAINT(str);
+ }
+ else if (OBJ_TAINTED(RMATCH(match)->str)) {
+ OBJ_TAINT(str);
+ }
return str;
}
@@ -1886,9 +2010,10 @@
for (i = 0; i < num_regs; i++) {
VALUE v;
rb_str_buf_cat2(str, " ");
- if (0 < i) {
- if (names[i].name)
+ if (i > 0) {
+ if (names[i].name) {
rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
+ }
else {
char buf[sizeof(i)*3+1];
snprintf(buf, sizeof(buf), "%d", i);
@@ -1897,10 +2022,12 @@
rb_str_buf_cat2(str, ":");
}
v = rb_reg_nth_match(i, match);
- if (v == Qnil)
+ if (v == Qnil) {
rb_str_buf_cat2(str, "nil");
- else
+ }
+ else {
rb_str_buf_append(str, rb_str_inspect(v, 0));
+ }
}
rb_str_buf_cat2(str, ">");
@@ -2324,7 +2451,6 @@
enc = rb_enc_get(str);
buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
- RB_GC_GUARD(str);
if (buf == Qnil) {
return rb_reg_error_desc(str, 0, err);
@@ -2404,15 +2530,21 @@
rb_encoding *a_enc = rb_ascii8bit_encoding();
#endif
- if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)
+ if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4) {
rb_raise(rb_eSecurityError, "Insecure: can't modify regexp");
+ }
rb_check_frozen(obj);
- if (FL_TEST(obj, REG_LITERAL))
+ if (FL_TEST(obj, REG_LITERAL)) {
rb_raise(rb_eSecurityError, "can't modify literal regexp");
- if (re->ptr) onig_free(re->ptr);
- if (re->str) xfree(re->str);
- re->ptr = 0;
- re->str = 0;
+ }
+ if (re->ptr != NULL) {
+ onig_free(re->ptr);
+ }
+ if (re->str != NULL) {
+ xfree(re->str);
+ }
+ re->ptr = NULL;
+ re->str = NULL;
unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
if (unescaped == Qnil)
@@ -2444,23 +2576,24 @@
if (options & ARG_ENCODING_NONE) {
re->basic.flags |= REG_ENCODING_NONE;
}
-
- GC_WB(&re->ptr, make_regexp(RSTRING_PTR(unescaped),
- RSTRING_LEN(unescaped), enc,
- options & ARG_REG_OPTION_MASK, err));
- if (!re->ptr) return -1;
+
+ Regexp *reg = make_regexp(RSTRING_PTR(unescaped),
+ RSTRING_LEN(unescaped), enc,
+ options & ARG_REG_OPTION_MASK, err);
+ if (reg == NULL) {
+ return -1;
+ }
+ GC_WB(&re->ptr, reg);
GC_WB(&re->str, ALLOC_N(char, len+1));
memcpy(re->str, s, len);
re->str[len] = '\0';
re->len = len;
- RB_GC_GUARD(unescaped);
return 0;
}
static int
rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err)
{
- int ret;
rb_encoding *enc = rb_enc_get(str);
if (options & ARG_ENCODING_NONE) {
#if !WITH_OBJC
@@ -2475,10 +2608,8 @@
}
#endif
}
- ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
- options, err);
- RB_GC_GUARD(str);
- return ret;
+ return rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
+ options, err);
}
static VALUE
Modified: MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb
===================================================================
--- MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb 2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/spec/macruby/core/hash_spec.rb 2009-07-20 20:49:50 UTC (rev 2036)
@@ -62,7 +62,7 @@
end
it "can have a singleton class" do
- a = NSDictionary.array
+ a = NSDictionary.dictionary
def a.foo; 42; end
a.foo.should == 42
lambda { a[42] = 123 }.should raise_error(RuntimeError)
Modified: MacRuby/branches/experimental/string.c
===================================================================
--- MacRuby/branches/experimental/string.c 2009-07-20 20:28:18 UTC (rev 2035)
+++ MacRuby/branches/experimental/string.c 2009-07-20 20:49:50 UTC (rev 2036)
@@ -157,6 +157,15 @@
}
VALUE
+rb_unicode_str_new(const UniChar *ptr, const size_t len)
+{
+ VALUE str = str_alloc(rb_cString);
+ CFStringAppendCharacters((CFMutableStringRef)str,
+ ptr, len);
+ return str;
+}
+
+VALUE
rb_str_new(const char *ptr, long len)
{
return str_new(rb_cString, ptr, len);
@@ -165,17 +174,13 @@
VALUE
rb_usascii_str_new(const char *ptr, long len)
{
- VALUE str = str_new(rb_cString, ptr, len);
-
- return str;
+ return str_new(rb_cString, ptr, len);
}
VALUE
rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
{
- VALUE str = str_new(rb_cString, ptr, len);
-
- return str;
+ return str_new(rb_cString, ptr, len);
}
VALUE
@@ -592,8 +597,12 @@
kCFStringEncodingUTF8);
cptr = (char *)xmalloc(max + 1);
- assert(CFStringGetCString((CFStringRef)ptr, cptr,
- max, kCFStringEncodingUTF8));
+ if (!CFStringGetCString((CFStringRef)ptr, cptr,
+ max + 1, kCFStringEncodingUTF8)) {
+ // Probably a UTF16 string...
+ xfree(cptr);
+ return NULL;
+ }
return cptr;
}
@@ -817,6 +826,9 @@
Check_Type(str2, T_STRING);
}
+ CFStringAppend((CFMutableStringRef)str, (CFStringRef)str2);
+
+#if 0
const char *ptr;
long len;
@@ -824,6 +836,7 @@
len = RSTRING_LEN(str2);
rb_objc_str_cat(str, ptr, len, kCFStringEncodingASCII);
+#endif
return str;
}
@@ -1892,13 +1905,12 @@
static VALUE
rb_str_sub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
{
- VALUE pat, repl, match, hash = Qnil;
- struct re_registers *regs;
- int iter = 0;
- int tainted = 0;
+ VALUE repl, hash = Qnil;
+ bool iter = false;
+ bool tainted = false;
if (argc == 1 && rb_block_given_p()) {
- iter = 1;
+ iter = true;
}
else if (argc == 2) {
repl = argv[1];
@@ -1906,30 +1918,33 @@
if (NIL_P(hash)) {
StringValue(repl);
}
- if (OBJ_TAINTED(repl)) tainted = 1;
+ if (OBJ_TAINTED(repl)) {
+ tainted = true;
+ }
}
else {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
}
- pat = get_pat(argv[0], 1);
+ VALUE pat = get_pat(argv[0], 1);
if (rb_reg_search(pat, str, 0, 0) >= 0) {
+ VALUE match = rb_backref_get();
+ struct re_registers *regs = RMATCH_REGS(match);
- match = rb_backref_get();
- regs = RMATCH_REGS(match);
-
if (iter || !NIL_P(hash)) {
-
if (iter) {
rb_match_busy(match);
repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
}
else {
- repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
+ repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+ END(0) - BEG(0)));
repl = rb_obj_as_string(repl);
}
str_frozen_check(str);
- if (iter) rb_backref_set(match);
+ if (iter) {
+ rb_backref_set(match);
+ }
}
else {
repl = rb_reg_regsub(repl, str, regs, pat);
@@ -1937,16 +1952,18 @@
rb_str_modify(str);
rb_str_splice_0(str, BEG(0), END(0) - BEG(0), repl);
- if (OBJ_TAINTED(repl)) tainted = 1;
+ if (OBJ_TAINTED(repl)) {
+ tainted = true;
+ }
- if (tainted) OBJ_TAINT(str);
-
+ if (tainted) {
+ OBJ_TAINT(str);
+ }
return str;
}
return Qnil;
}
-
/*
* call-seq:
* str.sub(pattern, replacement) => new_str
@@ -1989,37 +2006,35 @@
static VALUE
str_gsub(SEL sel, int argc, VALUE *argv, VALUE str, int bang)
{
- VALUE pat, val, repl, match, dest, hash = Qnil;
- struct re_registers *regs;
- long beg, n;
- long offset, slen, len;
- int iter = 0;
- const char *sp, *cp;
- int tainted = 0;
- rb_encoding *str_enc;
-
+ bool iter = false;
+ bool tainted = false;
+ VALUE hash = Qnil, repl = Qnil;
+
switch (argc) {
- case 1:
- RETURN_ENUMERATOR(str, argc, argv);
- iter = 1;
- break;
- case 2:
- repl = argv[1];
- hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
- if (NIL_P(hash)) {
- StringValue(repl);
- }
- if (OBJ_TAINTED(repl)) {
- tainted = 1;
- }
- break;
- default:
- rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
+ case 1:
+ RETURN_ENUMERATOR(str, argc, argv);
+ iter = true;
+ break;
+
+ case 2:
+ repl = argv[1];
+ hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
+ if (NIL_P(hash)) {
+ StringValue(repl);
+ }
+ if (OBJ_TAINTED(repl)) {
+ tainted = true;
+ }
+ break;
+
+ default:
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)",
+ argc);
}
- pat = get_pat(argv[0], 1);
- offset=0; n=0;
- beg = rb_reg_search(pat, str, 0, 0);
+ VALUE pat = get_pat(argv[0], 1);
+ long offset = 0;
+ long beg = rb_reg_search(pat, str, 0, 0);
if (beg < 0) {
if (bang) {
return Qnil; /* no match, no substitution */
@@ -2027,23 +2042,23 @@
return rb_str_new3(str);
}
- dest = rb_str_new5(str, NULL, 0);
- slen = RSTRING_LEN(str);
- sp = RSTRING_PTR(str);
- cp = sp;
- str_enc = NULL;
+ VALUE dest = rb_str_new5(str, NULL, 0);
+ long slen = RSTRING_LEN(str);
+ VALUE match;
do {
- n++;
match = rb_backref_get();
- regs = RMATCH_REGS(match);
+ struct re_registers *regs = RMATCH_REGS(match);
+ VALUE val;
+
if (iter || !NIL_P(hash)) {
if (iter) {
rb_match_busy(match);
val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
}
else {
- val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
+ val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
+ END(0) - BEG(0)));
val = rb_obj_as_string(val);
}
str_mod_check(str, sp, slen);
@@ -2062,13 +2077,15 @@
val = rb_reg_regsub(repl, str, regs, pat);
}
+
if (OBJ_TAINTED(val)) {
- tainted = 1;
+ tainted = true;
}
- len = beg - offset; /* copy pre-match substr */
- if (len) {
- rb_enc_str_buf_cat(dest, cp, len, str_enc);
+ long len = beg - offset; /* copy pre-match substr */
+ if (len > 0) {
+ rb_str_buf_append(dest, rb_str_subseq(str, offset, len));
+ //rb_enc_str_buf_cat(dest, cp, len, str_enc);
}
rb_str_buf_append(dest, val);
@@ -2079,18 +2096,24 @@
* Always consume at least one character of the input string
* in order to prevent infinite loops.
*/
- if (slen <= END(0)) break;
+ if (slen <= END(0)) {
+ break;
+ }
len = 1;
- rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
+ rb_str_buf_append(dest, rb_str_subseq(str, END(0), len));
+ //rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
offset = END(0) + len;
}
- cp = sp + offset;
- if (offset > slen) break;
+ if (offset > slen) {
+ break;
+ }
beg = rb_reg_search(pat, str, offset, 0);
- }
+ }
while (beg >= 0);
+
if (slen > offset) {
- rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
+ rb_str_buf_append(dest, rb_str_subseq(str, offset, slen - offset));
+ //rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
}
rb_backref_set(match);
if (bang) {
@@ -2099,7 +2122,7 @@
}
else {
if (!tainted && OBJ_TAINTED(str)) {
- tainted = 1;
+ tainted = true;
}
str = dest;
}
@@ -2443,6 +2466,7 @@
return str;
}
+#if 0
static void
str_cat_char(VALUE str, int c, rb_encoding *enc)
{
@@ -2458,6 +2482,7 @@
str_cat_char(str, '\\', enc);
str_cat_char(str, c, enc);
}
+#endif
/*
* call-seq:
@@ -2471,13 +2496,37 @@
* str.inspect #=> "\"hel\\bo\""
*/
+static inline void
+__append(CFMutableStringRef out, UniChar c, bool prefix)
+{
+ CFStringAppendCharacters(out, &c, 1);
+}
+
VALUE
rb_str_inspect(VALUE str, SEL sel)
{
- rb_encoding *enc = STR_ENC_GET(str);
+ const long len = CFStringGetLength((CFStringRef)str);
+ CFStringInlineBuffer buf;
+ CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, len));
+
+ CFMutableStringRef out = CFStringCreateMutable(NULL, 0);
+ __append(out, '"', false);
+
+ long i;
+ for (i = 0; i < len; i++) {
+ UniChar c = CFStringGetCharacterFromInlineBuffer(&buf, i);
+ __append(out, c, false);
+ }
+ __append(out, '"', false);
+
+ return (VALUE)CFMakeCollectable(out);
+
+#if 0
const char *p, *pend;
VALUE result;
+
+
p = RSTRING_PTR(str);
pend = p + RSTRING_LEN(str);
if (p == NULL) {
@@ -2545,6 +2594,7 @@
str_cat_char(result, '"', enc);
return result;
+#endif
}
#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20090720/67a4dbba/attachment-0001.html>
More information about the macruby-changes
mailing list