[macruby-changes] [192] MacRuby/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Thu May 15 01:06:23 PDT 2008
Revision: 192
http://trac.macosforge.org/projects/ruby/changeset/192
Author: lsansonetti at apple.com
Date: 2008-05-15 01:06:22 -0700 (Thu, 15 May 2008)
Log Message:
-----------
removing the upstream encoding implementations and using CFString's instead + misc bug and memory leak fixes
Modified Paths:
--------------
MacRuby/trunk/bs.c
MacRuby/trunk/debug.c
MacRuby/trunk/encoding.c
MacRuby/trunk/gc.c
MacRuby/trunk/include/ruby/encoding.h
MacRuby/trunk/io.c
MacRuby/trunk/marshal.c
MacRuby/trunk/numeric.c
MacRuby/trunk/objc.m
MacRuby/trunk/parse.y
MacRuby/trunk/re.c
MacRuby/trunk/ruby.c
MacRuby/trunk/string.c
MacRuby/trunk/time.c
MacRuby/trunk/transcode.c
Modified: MacRuby/trunk/bs.c
===================================================================
--- MacRuby/trunk/bs.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/bs.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -186,6 +186,7 @@
break;
}
}
+ free(type_modifier);
}
static inline bool
@@ -921,6 +922,7 @@
ASSERT_ALLOC(bs_informal_method);
bs_informal_method->name = sel_registerName(selector);
+ free(selector);
bs_informal_method->class_method =
get_boolean_attribute(reader, "class_method", false);
bs_informal_method->type = method_type;
Modified: MacRuby/trunk/debug.c
===================================================================
--- MacRuby/trunk/debug.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/debug.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -21,6 +21,7 @@
enum ruby_value_type value_type;
enum node_type node_type;
enum {
+#if !WITH_OBJC
RUBY_ENCODING_INLINE_MAX = ENCODING_INLINE_MAX,
RUBY_ENCODING_SHIFT = ENCODING_SHIFT,
RUBY_ENCODING_MASK = ENCODING_MASK,
@@ -28,7 +29,8 @@
RUBY_ENC_CODERANGE_UNKNOWN = ENC_CODERANGE_UNKNOWN,
RUBY_ENC_CODERANGE_7BIT = ENC_CODERANGE_7BIT,
RUBY_ENC_CODERANGE_VALID = ENC_CODERANGE_VALID,
- RUBY_ENC_CODERANGE_BROKEN = ENC_CODERANGE_BROKEN,
+ RUBY_ENC_CODERANGE_BROKEN = ENC_CODERANGE_BROKEN,
+#endif
RUBY_FL_MARK = FL_MARK,
RUBY_FL_RESERVED = FL_RESERVED,
RUBY_FL_FINALIZE = FL_FINALIZE,
Modified: MacRuby/trunk/encoding.c
===================================================================
--- MacRuby/trunk/encoding.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/encoding.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -20,6 +20,100 @@
static ID id_encoding, id_base_encoding;
static VALUE rb_cEncoding;
+#if WITH_OBJC
+
+static CFMutableDictionaryRef __encodings = NULL;
+
+static VALUE
+enc_new(const CFStringEncoding *enc)
+{
+ return Data_Wrap_Struct(rb_cEncoding, NULL, NULL, (void *)enc);
+}
+
+static void
+enc_init_db(void)
+{
+ const CFStringEncoding *e;
+
+ __encodings = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
+
+ e = CFStringGetListOfAvailableEncodings();
+ while (e != NULL && *e != kCFStringEncodingInvalidId) {
+ VALUE iana;
+ VALUE encoding;
+
+ encoding = enc_new(e);
+
+ iana = (VALUE)CFStringConvertEncodingToIANACharSetName(*e);
+ if (iana != 0) {
+ const char *name;
+ char *p;
+
+ name = RSTRING_CPTR(iana);
+ p = strchr(name, '-');
+ if ((p = strchr(name, '-')) != NULL
+ || islower(*name)) {
+ char *tmp = alloca(strlen(name));
+ strcpy(tmp, name);
+ if (p != NULL) {
+ p = tmp + (p - name);
+ do {
+ *p = '_';
+ p++;
+ p = strchr(p, '-');
+ }
+ while (p != NULL);
+ }
+ if (islower(*tmp))
+ *tmp = toupper(*tmp);
+ name = tmp;
+ }
+ rb_define_const(rb_cEncoding, name, encoding);
+ }
+ CFDictionarySetValue(__encodings, (const void *)(*e),
+ (const void *)encoding);
+ e++;
+ }
+
+ assert(CFDictionaryGetCount((CFDictionaryRef)__encodings) > 0);
+}
+
+static VALUE
+enc_make(const CFStringEncoding *enc)
+{
+ VALUE v;
+ v = (VALUE)CFDictionaryGetValue( (CFDictionaryRef)__encodings,
+ (const void *)(*enc));
+ assert(v != 0);
+ return v;
+}
+
+VALUE
+rb_enc_from_encoding(rb_encoding *enc)
+{
+ return enc_make(enc);
+}
+
+static inline CFStringEncoding
+rb_enc_to_enc(VALUE v)
+{
+ return *(CFStringEncoding *)DATA_PTR(v);
+}
+
+static inline CFStringEncoding *
+rb_enc_to_enc_ptr(VALUE v)
+{
+ return (CFStringEncoding *)DATA_PTR(v);
+}
+
+rb_encoding *
+rb_to_encoding(VALUE v)
+{
+ return rb_enc_to_enc_ptr(v);
+}
+
+#else
+
struct rb_encoding_entry {
const char *name;
rb_encoding *enc;
@@ -81,9 +175,6 @@
{
VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, 0, encoding);
encoding->auxiliary_data = (void *)enc;
-#if WITH_OBJC
- rb_objc_retain(enc);
-#endif
return enc;
}
@@ -348,12 +439,17 @@
return index;
}
#endif
+#endif // WITH_OBJC
int
rb_enc_dummy_p(rb_encoding *enc)
{
+#if WITH_OBJC
+ return Qfalse;
+#else
VALUE encoding = rb_enc_from_encoding(enc);
return ENC_DUMMY_P(encoding);
+#endif
}
/*
@@ -375,6 +471,7 @@
return rb_enc_dummy_p(rb_to_encoding(enc)) ? Qtrue : Qfalse;
}
+#if !WITH_OBJC
static int
enc_alias(const char *alias, int idx)
{
@@ -576,6 +673,7 @@
rb_raise(rb_eTypeError, "wrong argument type %s (not encode capable)", etype);
}
}
+#endif
ID
rb_id_encoding(void)
@@ -586,12 +684,10 @@
return id_encoding;
}
+#if !WITH_OBJC
int
rb_enc_internal_get_index(VALUE obj)
{
-#if WITH_OBJC
- return 0;
-#else
int i;
i = ENCODING_GET_INLINED(obj);
@@ -602,13 +698,11 @@
i = NUM2INT(iv);
}
return i;
-#endif
}
void
rb_enc_internal_set_index(VALUE obj, int idx)
{
-#if !WITH_OBJC
if (idx < ENCODING_INLINE_MAX) {
ENCODING_SET_INLINED(obj, idx);
return;
@@ -616,7 +710,6 @@
ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
return;
-#endif
}
void
@@ -648,31 +741,31 @@
rb_encoding*
rb_enc_get(VALUE obj)
{
-#if WITH_OBJC
- return rb_ascii8bit_encoding(); /* FIXME */
-#else
return rb_enc_from_index(rb_enc_get_index(obj));
-#endif
}
rb_encoding*
rb_enc_check(VALUE str1, VALUE str2)
{
-#if WITH_OBJC
- return NULL;
-#else
rb_encoding *enc = rb_enc_compatible(str1, str2);
if (!enc)
rb_raise(rb_eArgError, "character encodings differ: %s and %s",
rb_enc_name(rb_enc_get(str1)),
rb_enc_name(rb_enc_get(str2)));
return enc;
+}
#endif
-}
rb_encoding*
rb_enc_compatible(VALUE str1, VALUE str2)
{
+#if WITH_OBJC
+ /* TODO */
+ rb_encoding *enc = rb_enc_get(str1);
+ if (enc == rb_enc_get(str2))
+ return enc;
+ return NULL;
+#else
int idx1, idx2;
rb_encoding *enc1, *enc2;
@@ -724,15 +817,17 @@
return enc2;
}
return 0;
+#endif
}
+#if !WITH_OBJC
void
rb_enc_copy(VALUE obj1, VALUE obj2)
{
rb_enc_associate_index(obj1, rb_enc_get_index(obj2));
}
+#endif
-
/*
* call-seq:
* obj.encoding => encoding
@@ -743,26 +838,18 @@
VALUE
rb_obj_encoding(VALUE obj)
{
-#if WITH_OBJC
- /* TODO */
- return Qnil;
-#else
rb_encoding *enc = rb_enc_get(obj);
if (!enc) {
rb_raise(rb_eTypeError, "unknown encoding");
}
return rb_enc_from_encoding(enc);
-#endif
}
+#if !WITH_OBJC
int
rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
{
-#if WITH_OBJC
- int n = 1;
-#else
int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
-#endif
if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
return MBCLEN_CHARFOUND_LEN(n);
else {
@@ -840,6 +927,7 @@
{
return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c));
}
+#endif
/*
* call-seq:
@@ -853,11 +941,24 @@
static VALUE
enc_inspect(VALUE self)
{
+#if WITH_OBJC
+ char buffer[512];
+ VALUE enc_name;
+ long n;
+
+ enc_name = (VALUE)CFStringGetNameOfEncoding(rb_enc_to_enc(self));
+
+ n = snprintf(buffer, sizeof buffer, "#<%s:%s>", rb_obj_classname(self),
+ RSTRING_CPTR(enc_name));
+
+ return rb_str_new(buffer, n);
+#else
VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
rb_enc_name((rb_encoding*)DATA_PTR(self)),
(ENC_DUMMY_P(self) ? " (dummy)" : ""));
ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
return str;
+#endif
}
/*
@@ -871,7 +972,11 @@
static VALUE
enc_name(VALUE self)
{
+#if WITH_OBJC
+ return (VALUE)CFStringConvertEncodingToIANACharSetName(rb_enc_to_enc(self));
+#else
return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
+#endif
}
static VALUE
@@ -898,9 +1003,21 @@
* #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
*
*/
+
static VALUE
enc_list(VALUE klass)
{
+#if WITH_OBJC
+ VALUE ary;
+ const CFStringEncoding *e;
+
+ ary = rb_ary_new();
+ e = CFStringGetListOfAvailableEncodings();
+ while (e != NULL && *e != kCFStringEncodingInvalidId) {
+ rb_ary_push(ary, enc_make(e));
+ e++;
+ }
+#else
VALUE ary = rb_ary_new2(enc_table.count);
int i;
for (i = 0; i < enc_table.count; ++i) {
@@ -909,6 +1026,7 @@
rb_ary_push(ary, rb_enc_from_encoding(enc));
}
}
+#endif
return ary;
}
@@ -925,8 +1043,25 @@
*
*/
static VALUE
+enc_find2(VALUE enc)
+{
+ CFStringEncoding e;
+
+ e = CFStringConvertIANACharSetNameToEncoding((CFStringRef)StringValue(enc));
+ if (e == kCFStringEncodingInvalidId)
+ return Qnil;
+ return enc_make(&e);
+}
+
+static VALUE
enc_find(VALUE klass, VALUE enc)
{
+#if WITH_OBJC
+ VALUE e = enc_find2(enc);
+ if (e == Qnil)
+ rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
+ return e;
+#else
int idx;
StringValue(enc);
@@ -938,6 +1073,7 @@
rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
}
return rb_enc_from_encoding(rb_enc_from_index(idx));
+#endif
}
/*
@@ -983,6 +1119,7 @@
return enc_find(klass, str);
}
+#if !WITH_OBJC
rb_encoding *
rb_ascii8bit_encoding(void)
{
@@ -1045,7 +1182,24 @@
{
return rb_enc_from_encoding(rb_default_external_encoding());
}
+#endif
+#if WITH_OBJC
+static rb_encoding *default_external;
+
+rb_encoding *
+rb_default_external_encoding(void)
+{
+ return default_external;
+}
+
+VALUE
+rb_enc_default_external(void)
+{
+ return enc_make(default_external);
+}
+#endif
+
/*
* call-seq:
* Encoding.default_external => enc
@@ -1063,7 +1217,11 @@
void
rb_enc_set_default_external(VALUE encoding)
{
+#if WITH_OBJC
+ default_external = rb_enc_to_enc_ptr(encoding);
+#else
default_external_index = rb_enc_to_index(rb_to_encoding(encoding));
+#endif
}
/*
@@ -1088,7 +1246,10 @@
VALUE
rb_locale_charmap(VALUE klass)
{
-#if defined NO_LOCALE_CHARMAP
+#if WITH_OBJC
+ CFStringEncoding enc = CFStringGetSystemEncoding();
+ return (VALUE)CFStringConvertEncodingToIANACharSetName(enc);
+#elif defined NO_LOCALE_CHARMAP
return rb_usascii_str_new2("ASCII-8BIT");
#elif defined HAVE_LANGINFO_H
char *codeset;
@@ -1101,6 +1262,7 @@
#endif
}
+#if !WITH_OBJC
static void
set_encoding_const(const char *name, rb_encoding *enc)
{
@@ -1156,6 +1318,7 @@
rb_ary_push(ary, str);
return ST_CONTINUE;
}
+#endif
/*
* call-seq:
@@ -1176,11 +1339,22 @@
static VALUE
rb_enc_name_list(VALUE klass)
{
+#if WITH_OBJC
+ VALUE ary, list;
+ long i, count;
+
+ ary = rb_ary_new();
+ list = enc_list(klass);
+ for (i = 0, count = RARRAY_LEN(list); i < count; i++)
+ rb_ary_push(ary, enc_name(RARRAY_AT(list, i)));
+#else
VALUE ary = rb_ary_new2(enc_table.names->num_entries);
st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
+#endif
return ary;
}
+#if !WITH_OBJC
static int
rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
{
@@ -1204,6 +1378,7 @@
rb_hash_aset(aliases, key, str);
return ST_CONTINUE;
}
+#endif
/*
* call-seq:
@@ -1220,13 +1395,78 @@
static VALUE
rb_enc_aliases(VALUE klass)
{
+#if WITH_OBJC
+ /* TODO: the CFString IANA <-> charset code does support aliases, we should
+ * find a way to return them here.
+ */
+ return rb_hash_new();
+#else
VALUE aliases[2];
aliases[0] = rb_hash_new();
aliases[1] = rb_ary_new();
st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
return aliases[0];
+#endif
}
+const char *
+rb_enc_name(rb_encoding *enc)
+{
+ CFStringRef str;
+ if (enc != NULL
+ && (str = CFStringConvertEncodingToIANACharSetName(*enc)) != NULL)
+ return RSTRING_CPTR(str);
+ return NULL;
+}
+
+long
+rb_enc_mbminlen(rb_encoding *enc)
+{
+ return rb_enc_mbmaxlen(enc);
+}
+
+long
+rb_enc_mbmaxlen(rb_encoding *enc)
+{
+ return CFStringGetMaximumSizeForEncoding(1, *enc);
+}
+
+rb_encoding *
+rb_enc_find(const char *name)
+{
+ return rb_enc_find2(rb_str_new2(name));
+}
+
+rb_encoding *
+rb_enc_find2(VALUE name)
+{
+ VALUE e = enc_find2(name);
+ return e == Qnil ? NULL : rb_enc_to_enc_ptr(e);
+}
+
+rb_encoding *
+rb_enc_get(VALUE obj)
+{
+ int type = TYPE(obj);
+ if (type == T_STRING) {
+ CFStringEncoding enc = CFStringGetFastestEncoding((CFStringRef)obj);
+ if (enc == kCFStringEncodingInvalidId)
+ return NULL;
+ return rb_enc_to_enc_ptr(enc_make(&enc));
+ }
+ else {
+ /* TODO */
+ return NULL;
+ }
+}
+
+rb_encoding *
+rb_locale_encoding(void)
+{
+ CFStringEncoding enc = CFStringGetSystemEncoding();
+ return rb_enc_to_enc_ptr(enc_make(&enc));
+}
+
void
Init_Encoding(void)
{
Modified: MacRuby/trunk/gc.c
===================================================================
--- MacRuby/trunk/gc.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/gc.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -2039,7 +2039,8 @@
if (ctx->class_of != 0) {
if (ctx->class_of == rb_cClass) {
/* Class is a special case. */
- if (TYPE(r->address) != T_CLASS
+ if (rb_objc_is_non_native(r->address)
+ || TYPE(r->address) != T_CLASS
|| FL_TEST(r->address, FL_SINGLETON))
continue;
}
Modified: MacRuby/trunk/include/ruby/encoding.h
===================================================================
--- MacRuby/trunk/include/ruby/encoding.h 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/include/ruby/encoding.h 2008-05-15 08:06:22 UTC (rev 192)
@@ -17,6 +17,13 @@
#else
# include <varargs.h>
#endif
+
+#if WITH_OBJC
+
+typedef CFStringEncoding rb_encoding;
+
+#else
+
#include "ruby/oniguruma.h"
#define ENCODING_INLINE_MAX 1023
@@ -48,8 +55,6 @@
# define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
#endif
-#define ENCODING_MAXNAMELEN 42
-
#define ENC_CODERANGE_MASK (FL_USER8|FL_USER9)
#define ENC_CODERANGE_UNKNOWN 0
#define ENC_CODERANGE_7BIT FL_USER8
@@ -75,7 +80,10 @@
} while (0)
typedef OnigEncodingType rb_encoding;
+#endif
+#define ENCODING_MAXNAMELEN 42
+
int rb_enc_replicate(const char *, rb_encoding *);
int rb_define_dummy_encoding(const char *);
int rb_enc_dummy_p(rb_encoding *);
@@ -108,12 +116,25 @@
/* name -> rb_encoding */
rb_encoding * rb_enc_find(const char *name);
+#if WITH_OBJC
+rb_encoding * rb_enc_find2(VALUE name);
+#endif
+
/* encoding -> name */
+#if WITH_OBJC
+const char *rb_enc_name(rb_encoding *);
+#else
#define rb_enc_name(enc) (enc)->name
+#endif
/* encoding -> minlen/maxlen */
+#if WITH_OBJC
+long rb_enc_mbminlen(rb_encoding *);
+long rb_enc_mbmaxlen(rb_encoding *);
+#else
#define rb_enc_mbminlen(enc) (enc)->min_enc_len
#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
+#endif
/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
@@ -148,6 +169,17 @@
/* ptr, ptr, encoding -> newline_or_not */
#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end))
+#if WITH_OBJC
+#define rb_enc_isctype(c,t,enc) (iswctype(c,t))
+#define rb_enc_isascii(c,enc) (iswascii(c))
+#define rb_enc_isalpha(c,enc) (iswalpha(c))
+#define rb_enc_islower(c,enc) (iswlower(c))
+#define rb_enc_isupper(c,enc) (iswupper(c))
+#define rb_enc_isalnum(c,enc) (iswalnum(c))
+#define rb_enc_isprint(c,enc) (iswprint(c))
+#define rb_enc_isspace(c,enc) (iswspace(c))
+#define rb_enc_isdigit(c,enc) (iswdigit(c))
+#else
#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
@@ -157,6 +189,7 @@
#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c)
#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
+#endif
#define rb_enc_asciicompat(enc) (!rb_enc_dummy_p(enc) && rb_enc_mbminlen(enc)==1)
Modified: MacRuby/trunk/io.c
===================================================================
--- MacRuby/trunk/io.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/io.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -1351,6 +1351,7 @@
io_enc_str(VALUE str, rb_io_t *fptr)
{
OBJ_TAINT(str);
+#if !WITH_OBJC
if (fptr->enc2) {
/* two encodings, so transcode from enc2 to enc */
/* the methods in transcode.c are static, so call indirectly */
@@ -1362,6 +1363,7 @@
/* just one encoding, so associate it with the string */
rb_enc_associate(str, io_read_encoding(fptr));
}
+#endif
return str;
}
@@ -1372,7 +1374,9 @@
long n;
long pos = 0;
rb_encoding *enc = io_input_encoding(fptr);
+#if !WITH_OBJC
int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0;
+#endif
if (siz == 0) siz = BUFSIZ;
if (NIL_P(str)) {
@@ -1388,17 +1392,21 @@
break;
}
bytes += n;
+#if !WITH_OBJC
if (cr != ENC_CODERANGE_BROKEN)
pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + bytes, enc, &cr);
+#endif
if (bytes < siz) break;
siz += BUFSIZ;
rb_str_resize(str, siz);
}
if (bytes != siz) rb_str_resize(str, bytes);
str = io_enc_str(str, fptr);
+#if !WITH_OBJC
if (!fptr->enc2) {
ENC_CODERANGE_SET(str, cr);
}
+#endif
return str;
}
@@ -1738,6 +1746,7 @@
RSTRING_PTR(str)[last++] = c;
}
if (limit > 0 && limit == pending) {
+#if !WITH_OBJC
char *p = fptr->rbuf+fptr->rbuf_off;
char *pp = p + limit;
char *pl = rb_enc_left_char_head(p, pp, enc);
@@ -1748,6 +1757,7 @@
limit = pending;
rb_str_set_len(str, RSTRING_LEN(str)-diff);
}
+#endif
}
read_buffered_data(RSTRING_PTR(str) + last, pending, fptr); /* must not fail */
limit -= pending;
@@ -1816,7 +1826,9 @@
int len = 0;
long pos = 0;
rb_encoding *enc = io_input_encoding(fptr);
+#if !WITH_OBJC
int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0;
+#endif
for (;;) {
long pending = READ_DATA_PENDING_COUNT(fptr);
@@ -1839,8 +1851,10 @@
read_buffered_data(RSTRING_PTR(str)+len, pending, fptr);
}
len += pending;
+#if !WITH_OBJC
if (cr != ENC_CODERANGE_BROKEN)
pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + len, enc, &cr);
+#endif
if (e) break;
}
rb_thread_wait_fd(fptr->fd);
@@ -1853,7 +1867,9 @@
RSTRING_SYNC(str);
str = io_enc_str(str, fptr);
+#if !WITH_OBJC
if (!fptr->enc2) ENC_CODERANGE_SET(str, cr);
+#endif
fptr->lineno++;
ARGF.lineno = INT2FIX(fptr->lineno);
return str;
@@ -1884,6 +1900,7 @@
}
}
if (!NIL_P(rs)) {
+#if !WITH_OBJC
rb_encoding *enc_rs, *enc_io;
GetOpenFile(io, fptr);
@@ -1911,6 +1928,7 @@
rs = rs2;
}
}
+#endif
}
*rsp = rs;
*limit = NIL_P(lim) ? -1L : NUM2LONG(lim);
@@ -1934,8 +1952,12 @@
else if (limit == 0) {
return rb_enc_str_new(0, 0, io_read_encoding(fptr));
}
- else if (rs == rb_default_rs && limit < 0 &&
- rb_enc_asciicompat(io_read_encoding(fptr))) {
+ else if (rs == rb_default_rs && limit < 0
+#if WITH_OBJC
+ ) {
+#else
+ && rb_enc_asciicompat(io_read_encoding(fptr))) {
+#endif
return rb_io_getline_fast(fptr);
}
else {
@@ -1964,8 +1986,10 @@
if (RSTRING_LEN(str) < rslen) continue;
s = RSTRING_PTR(str);
p = s + RSTRING_LEN(str) - rslen;
+#if !WITH_OBJC
pp = rb_enc_left_char_head(s, p, enc);
if (pp != p) continue;
+#endif
if (!rspara) rscheck(rsptr, rslen, rs);
if (memcmp(p, rsptr, rslen) == 0) break;
}
@@ -2296,6 +2320,11 @@
if (io_fillbuf(fptr) < 0) {
return Qnil;
}
+#if WITH_OBJC
+ /* FIXME */
+ if (0) {
+ }
+#else
r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
if (MBCLEN_CHARFOUND_P(r) &&
(n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) {
@@ -2317,6 +2346,7 @@
}
}
}
+#endif
else {
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1);
fptr->rbuf_off++;
@@ -2447,10 +2477,14 @@
enc = io_read_encoding(fptr);
if (FIXNUM_P(c)) {
int cc = FIX2INT(c);
+#if WITH_OBJC
+ c = rb_str_new((char *)&cc, 1);
+#else
char buf[16];
rb_enc_mbcput(cc, buf, enc);
c = rb_str_new(buf, rb_enc_codelen(cc, enc));
+#endif
}
else {
SafeStringValue(c);
@@ -3227,11 +3261,24 @@
{
const char *p0, *p1;
char *enc2name;
+#if WITH_OBJC
+ rb_encoding *enc1, enc2;
+#else
int idx, idx2;
-
+#endif
+
p0 = strrchr(estr, ':');
if (!p0) p1 = estr;
else p1 = p0 + 1;
+#if WITH_OBJC
+ enc1 = rb_enc_find(p1);
+ if (enc1 != NULL) {
+ fptr->enc = enc1;
+ }
+ else {
+ rb_warn("Unsupported encoding %s ignored", p1);
+ }
+#else
idx = rb_enc_find_index(p1);
if (idx >= 0) {
fptr->enc = rb_enc_from_index(idx);
@@ -3239,28 +3286,49 @@
else {
rb_warn("Unsupported encoding %s ignored", p1);
}
+#endif
if (p0) {
int n = p0 - estr;
if (n > ENCODING_MAXNAMELEN) {
+#if WITH_OBJC
+ enc2 = NULL;
+#else
idx2 = -1;
+#endif
}
else {
enc2name = ALLOCA_N(char, n+1);
memcpy(enc2name, estr, n);
enc2name[n] = '\0';
estr = enc2name;
+#if WITH_OBJC
+ enc2 = rb_enc_find(enc2name);
+#else
idx2 = rb_enc_find_index(enc2name);
+#endif
}
+#if WITH_OBJC
+ if (enc2 == NULL) {
+#else
if (idx2 < 0) {
+#endif
rb_warn("Unsupported encoding %.*s ignored", n, estr);
}
+#if WITH_OBJC
+ else if (enc1 == enc2) {
+#else
else if (idx2 == idx) {
+#endif
rb_warn("Ignoring internal encoding %.*s: it is identical to external encoding %s",
n, estr, p1);
}
else {
+#if WITH_OBJC
+ fptr->enc2 = enc2;
+#else
fptr->enc2 = rb_enc_from_index(idx2);
+#endif
}
}
}
Modified: MacRuby/trunk/marshal.c
===================================================================
--- MacRuby/trunk/marshal.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/marshal.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -469,8 +469,18 @@
static void
w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
{
+ rb_encoding *enc = 0;
+#if WITH_OBJC
+ const char *name;
+
+ enc = rb_enc_get(obj);
+ if (enc == NULL) {
+ w_long(num, arg->arg);
+ return;
+ }
+ name = rb_enc_name(enc);
+#else
int encidx = rb_enc_get_index(obj);
- rb_encoding *enc = 0;
st_data_t name;
if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
@@ -487,6 +497,7 @@
name = (st_data_t)rb_str_new2(rb_enc_name(enc));
st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
} while (0);
+#endif
w_object(name, arg->arg, arg->limit);
}
@@ -1110,7 +1121,11 @@
while (len--) {
ID id = r_symbol(arg);
VALUE val = r_object(arg);
+#if WITH_OBJC
+ if (0) {
+#else
if (id == rb_id_encoding()) {
+#endif
int idx = rb_enc_find_index(StringValueCStr(val));
if (idx > 0) rb_enc_associate_index(obj, idx);
}
Modified: MacRuby/trunk/numeric.c
===================================================================
--- MacRuby/trunk/numeric.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/numeric.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -1863,12 +1863,17 @@
rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1)", argc);
break;
}
+#if WITH_OBJC
+ /* TODO */
+ rb_notimplement();
+#else
enc = rb_to_encoding(argv[0]);
if (!enc) enc = rb_ascii8bit_encoding();
if (i < 0 || (n = rb_enc_codelen(i, enc)) <= 0) goto out_of_range;
str = rb_enc_str_new(0, n, enc);
rb_enc_mbcput(i, RSTRING_PTR(str), enc);
return str;
+#endif
}
/********************************************************************
Modified: MacRuby/trunk/objc.m
===================================================================
--- MacRuby/trunk/objc.m 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/objc.m 2008-05-15 08:06:22 UTC (rev 192)
@@ -460,6 +460,7 @@
{
char v = RTEST(rval);
*(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberCharType, &v);
+ CFMakeCollectable(*(id *)ocval);
return true;
}
@@ -467,6 +468,7 @@
{
double v = RFLOAT_VALUE(rval);
*(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberDoubleType, &v);
+ CFMakeCollectable(*(id *)ocval);
return true;
}
@@ -487,6 +489,7 @@
*(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberLongType, &v);
#endif
}
+ CFMakeCollectable(*(id *)ocval);
return true;
}
@@ -495,6 +498,7 @@
ID name = SYM2ID(rval);
*(id *)ocval = (id)CFStringCreateWithCString(NULL, rb_id2name(name),
kCFStringEncodingASCII); /* XXX this is temporary */
+ CFMakeCollectable(*(id *)ocval);
return true;
}
}
@@ -2120,7 +2124,7 @@
if (bs_find_path(framework_path, path, sizeof path)) {
if (!bs_parse(path, 0, bs_parse_cb, NULL, &error))
rb_raise(rb_eRuntimeError, error);
-#if 1
+#if 0
/* FIXME 'GC capability mismatch' with .dylib files */
p = strrchr(path, '.');
assert(p != NULL);
Modified: MacRuby/trunk/parse.y
===================================================================
--- MacRuby/trunk/parse.y 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/parse.y 2008-05-15 08:06:22 UTC (rev 192)
@@ -266,14 +266,23 @@
#endif
};
-#define UTF8_ENC() (parser->utf8 ? parser->utf8 : \
+#if WITH_OBJC
+# define UTF8_ENC() (NULL)
+#else
+# define UTF8_ENC() (parser->utf8 ? parser->utf8 : \
(parser->utf8 = rb_utf8_encoding()))
+#endif
#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
#define STR_NEW0() rb_usascii_str_new(0,0)
#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
-#define STR_ENC(m) ((m)?parser->enc:rb_usascii_encoding())
-#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
+#if WITH_OBJC
+# define STR_ENC(m) (parser->enc)
+# define ENC_SINGLE(cr) (1)
+#else
+# define STR_ENC(m) ((m)?parser->enc:rb_usascii_encoding())
+# define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
+#endif
#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb))
#ifdef YYMALLOC
@@ -4641,8 +4650,10 @@
# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
#endif
+#if !WITH_OBJC
#define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc)
#define parser_precise_mbclen() rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc)
+#endif
#define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p))
#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc))
@@ -4678,11 +4689,19 @@
if (len > max_line_margin * 2 + 10) {
if (lex_p - p > max_line_margin) {
+#if WITH_OBJC
+ p = lex_p - max_line_margin;
+#else
p = rb_enc_prev_char(p, lex_p - max_line_margin, rb_enc_get(lex_lastline));
+#endif
pre = "...";
}
if (pe - lex_p > max_line_margin) {
+#if WITH_OBJC
+ pe = lex_p + max_line_margin;
+#else
pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, rb_enc_get(lex_lastline));
+#endif
post = "...";
}
len = pe - p;
@@ -4956,6 +4975,7 @@
#endif
str = rb_enc_str_new(p, n, enc);
+#if !WITH_OBJC
if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) {
if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
rb_enc_associate(str, rb_usascii_encoding());
@@ -4964,6 +4984,7 @@
rb_enc_associate(str, rb_ascii8bit_encoding());
}
}
+#endif
return str;
}
@@ -5281,8 +5302,14 @@
static void
parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc)
{
+#if WITH_OBJC
+ /* FIXME */
+ char *buf = tokspace(1);
+ *(buf) = c;
+#else
int len = rb_enc_codelen(c, enc);
rb_enc_mbcput(c, tokspace(len), enc);
+#endif
}
static int
@@ -5409,11 +5436,15 @@
static int
parser_tokadd_mbchar(struct parser_params *parser, int c)
{
+#if WITH_OBJC
+ int len = 1;
+#else
int len = parser_precise_mbclen();
if (!MBCLEN_CHARFOUND_P(len)) {
compile_error(PARSER_ARG "invalid multibyte char");
return -1;
}
+#endif
tokadd(c);
lex_p += --len;
if (len > 0) tokcopy(len);
@@ -5856,8 +5887,15 @@
static void
parser_set_encode(struct parser_params *parser, const char *name)
{
+ rb_encoding *enc;
+#if WITH_OBJC
+ enc = rb_enc_find(name);
+ if (enc == NULL) {
+ rb_raise(rb_eArgError, "unknown encoding name: %s", name);
+ }
+ /* TODO should raise if the encoding is not ASCII compatible */
+#else
int idx = rb_enc_find_index(name);
- rb_encoding *enc;
if (idx < 0) {
rb_raise(rb_eArgError, "unknown encoding name: %s", name);
@@ -5866,6 +5904,7 @@
if (!rb_enc_asciicompat(enc)) {
rb_raise(rb_eArgError, "%s is not ASCII compatible", rb_enc_name(enc));
}
+#endif
parser->enc = enc;
}
@@ -6085,8 +6124,10 @@
}
pushback(c);
parser->enc = rb_enc_get(lex_lastline);
+#if !WITH_OBJC
if (parser->enc == NULL)
parser->enc = rb_utf8_encoding();
+#endif
}
#define IS_ARG() (lex_state == EXPR_ARG || lex_state == EXPR_CMDARG)
@@ -7248,9 +7289,13 @@
break;
}
+#if !WITH_OBJC
mb = ENC_CODERANGE_7BIT;
+#endif
do {
+#if !WITH_OBJC
if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
+#endif
if (tokadd_mbchar(c) == -1) return 0;
c = nextc();
} while (parser_is_identchar());
@@ -7303,7 +7348,11 @@
}
}
+#if WITH_OBJC
+ if (lex_state != EXPR_DOT) {
+#else
if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
+#endif
const struct kwtable *kw;
/* See if it is a reserved word. */
@@ -7557,11 +7606,13 @@
static void
literal_concat0(struct parser_params *parser, VALUE head, VALUE tail)
{
+#if !WITH_OBJC
if (!rb_enc_compatible(head, tail)) {
compile_error(PARSER_ARG "string literal encodings differ (%s / %s)",
rb_enc_name(rb_enc_get(head)),
rb_enc_name(rb_enc_get(tail)));
}
+#endif
RSTRING_SYNC(head);
rb_str_buf_append(head, tail);
}
@@ -8629,6 +8680,9 @@
static void
reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options)
{
+#if WITH_OBJC
+ /* TODO */
+#else
int c = RE_OPTION_ENCODING_IDX(options);
if (c) {
@@ -8663,6 +8717,7 @@
compile_error(PARSER_ARG
"regexp encoding option '%c' differs from source encoding '%s'",
c, rb_enc_name(rb_enc_get(str)));
+#endif
}
static void
@@ -8999,7 +9054,11 @@
++m;
if (m < e && is_identchar(m, e, enc)) {
if (!ISASCII(*m)) mb = 1;
+#if WITH_OBJC
+ m += e-m;
+#else
m += rb_enc_mbclen(m, e, enc);
+#endif
}
break;
default:
@@ -9015,7 +9074,11 @@
int
rb_symname_p(const char *name)
{
+#if WITH_OBJC
+ return rb_enc_symname_p(name, NULL);
+#else
return rb_enc_symname_p(name, rb_ascii8bit_encoding());
+#endif
}
int
@@ -9096,7 +9159,11 @@
id:
if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m)))
return Qfalse;
+#if WITH_OBJC
+ while (m < e && is_identchar(m, e, enc)) m += e-m;
+#else
while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
+#endif
if (localid) {
switch (*m) {
case '!': case '?': case '=': ++m;
@@ -9152,7 +9219,13 @@
case '$':
id |= ID_GLOBAL;
if ((mb = is_special_global_name(++m, e, enc)) != 0) {
- if (!--mb) enc = rb_ascii8bit_encoding();
+ if (!--mb) {
+#if WITH_OBJC
+ enc = NULL;
+#else
+ enc = rb_ascii8bit_encoding();
+#endif
+ }
goto new_id;
}
break;
@@ -9199,6 +9272,7 @@
}
break;
}
+#if !WITH_OBJC
mb = 0;
if (!rb_enc_isdigit(*m, enc)) {
while (m <= name + last && is_identchar(m, e, enc)) {
@@ -9225,12 +9299,12 @@
}
mbstr:;
}
+#endif
new_id:
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_register:
str = rb_enc_str_new(name, len, enc);
-// TODO
-// OBJ_FREEZE(str);
+ OBJ_FREEZE(str);
#if WITH_OBJC
CFDictionarySetValue(global_symbols.sym_id, (const void *)name_hash,
(const void *)id);
@@ -9246,7 +9320,11 @@
ID
rb_intern2(const char *name, long len)
{
+#if WITH_OBJC
+ return rb_intern3(name, len, NULL);
+#else
return rb_intern3(name, len, rb_usascii_encoding());
+#endif
}
#undef rb_intern
@@ -9262,12 +9340,16 @@
rb_encoding *enc;
ID id;
+#if WITH_OBJC
+ enc = rb_enc_get(str);
+#else
if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
enc = rb_usascii_encoding();
}
else {
enc = rb_enc_get(str);
}
+#endif
id = rb_intern3(RSTRING_CPTR(str), RSTRING_CLEN(str), enc);
RB_GC_GUARD(str);
return id;
@@ -9467,7 +9549,11 @@
#ifdef YYMALLOC
parser->heap = NULL;
#endif
+#if WITH_OBJC
+ parser->enc = NULL;
+#else
parser->enc = rb_usascii_encoding();
+#endif
}
extern void rb_mark_source_filename(char *);
Modified: MacRuby/trunk/re.c
===================================================================
--- MacRuby/trunk/re.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/re.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -184,6 +184,7 @@
case 'n':
*kcode = -1;
return (*option = ARG_ENCODING_NONE);
+#if !WITH_OBJC
case 'e':
*kcode = rb_enc_find_index("EUC-JP");
break;
@@ -193,6 +194,7 @@
case 'u':
*kcode = rb_enc_find_index("UTF-8");
break;
+#endif
default:
*kcode = -1;
return (*option = char_to_option(c));
@@ -219,10 +221,16 @@
p = s; pend = p + len;
while (p<pend) {
+#if WITH_OBJC
+ c = *p;
+ clen = 1;
+ if (0) {}
+#else
c = rb_enc_ascget(p, pend, &clen, enc);
if (c == -1) {
p += mbclen(p, pend, enc);
}
+#endif
else if (c != '/' && rb_enc_isprint(c, enc)) {
p += clen;
}
@@ -237,9 +245,18 @@
else {
p = s;
while (p<pend) {
+#if WITH_OBJC
+ c = *p;
+ clen = 1;
+#else
c = rb_enc_ascget(p, pend, &clen, enc);
+#endif
if (c == '\\' && p+clen < pend) {
+#if WITH_OBJC
+ int n = clen + (pend - (p+clen));
+#else
int n = clen + mbclen(p+clen, pend, enc);
+#endif
rb_str_buf_cat(str, p, n);
p += n;
continue;
@@ -249,12 +266,14 @@
rb_str_buf_cat(str, &c, 1);
rb_str_buf_cat(str, p, clen);
}
+#if !WITH_OBJC
else if (c == -1) {
int l = mbclen(p, pend, enc);
rb_str_buf_cat(str, p, l);
p += l;
continue;
}
+#endif
else if (rb_enc_isprint(c, enc)) {
rb_str_buf_cat(str, p, clen);
}
@@ -277,7 +296,9 @@
{
VALUE str = rb_str_buf_new2("/");
+#if !WITH_OBJC
rb_enc_copy(str, re);
+#endif
rb_reg_expr_str(str, s, len);
rb_str_buf_cat2(str, "/");
if (re) {
@@ -374,7 +395,9 @@
rb_reg_check(re);
+#if !WITH_OBJC
rb_enc_copy(str, re);
+#endif
options = RREGEXP(re)->ptr->options;
ptr = (UChar*)RREGEXP(re)->str;
len = RREGEXP(re)->len;
@@ -416,9 +439,17 @@
if (*ptr == ':' && ptr[len-1] == ')') {
int r;
Regexp *rp;
+ OnigEncoding oenc;
+
+#if WITH_OBJC
+ oenc = ONIG_ENCODING_ASCII;
+#else
+ oenc = rb_enc_get(re);
+#endif
+
r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT,
ONIGENC_CASE_FOLD_DEFAULT,
- rb_enc_get(re),
+ oenc,
OnigDefaultSyntax);
if (r == 0) {
++ptr;
@@ -445,7 +476,9 @@
rb_str_buf_cat2(str, ":");
rb_reg_expr_str(str, (char*)ptr, len);
rb_str_buf_cat2(str, ")");
+#if !WITH_OBJC
rb_enc_copy(str, re);
+#endif
OBJ_INFECT(str, re);
return str;
@@ -465,7 +498,9 @@
char opts[6];
VALUE desc = rb_str_buf_new2(err);
+#if !WITH_OBJC
rb_enc_associate(desc, enc);
+#endif
rb_str_buf_cat2(desc, ": /");
rb_reg_expr_str(desc, s, len);
opts[0] = '/';
@@ -629,6 +664,7 @@
Regexp *rp;
int r;
OnigErrorInfo einfo;
+ OnigEncoding oenc;
/* Handle escaped characters first. */
@@ -637,8 +673,14 @@
from that.
*/
+#if WITH_OBJC
+ oenc = ONIG_ENCODING_ASCII;
+#else
+ oenc = enc;
+#endif
+
r = onig_alloc_init(&rp, flags, ONIGENC_CASE_FOLD_DEFAULT,
- enc, OnigDefaultSyntax);
+ oenc, OnigDefaultSyntax);
if (r) {
onig_error_code_to_str((UChar*)err, r);
return 0;
@@ -742,7 +784,12 @@
c = 0;
for (i = 0; i < num_pos; i++) {
q = s + pairs[i].byte_pos;
+#if WITH_OBJC
+ //long n = strlen(p);
+ c += q-p;//(n > (q-p) ? q-p : n);
+#else
c += rb_enc_strlen(p, q, enc);
+#endif
pairs[i].char_pos = c;
p = q;
}
@@ -1052,20 +1099,25 @@
int need_recompile = 0;
rb_encoding *enc;
+#if WITH_OBJC
+ need_recompile = 0;
+#else
if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eArgError,
"broken %s string",
rb_enc_name(rb_enc_get(str)));
}
+#endif
rb_reg_check(re);
/* ignorecase status */
+#if !WITH_OBJC
if (rb_reg_fixed_encoding_p(re) || !rb_enc_str_asciicompat_p(str)) {
if (ENCODING_GET(re) != rb_enc_get_index(str) &&
rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
rb_raise(rb_eArgError,
"incompatible encoding regexp match (%s regexp with %s string)",
- rb_enc_name(rb_enc_from_index(ENCODING_GET(re))),
+ rb_enc_name(rb_enc_get(re)),
rb_enc_name(rb_enc_get(str)));
}
}
@@ -1082,6 +1134,7 @@
rb_enc_name(enc));
}
}
+#endif
if (need_recompile) {
onig_errmsg_buffer err = "";
@@ -1104,10 +1157,14 @@
rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
}
+#if WITH_OBJC
+ enc = (rb_encoding *)ONIG_ENCODING_ASCII;
+#endif
+
r = onig_new(®2, (UChar* )RSTRING_CPTR(unescaped),
(UChar* )(RSTRING_CPTR(unescaped)
+ RSTRING_CLEN(unescaped)),
- reg->options, enc,
+ reg->options, (OnigEncoding)enc,
OnigDefaultSyntax, &einfo);
if (r) {
onig_error_code_to_str((UChar*)err, r, &einfo);
@@ -1815,8 +1872,12 @@
}
chbuf[chlen++] = byte;
- while (chlen < chmaxlen &&
- MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
+ while (chlen < chmaxlen
+#if WITH_OBJC
+ && 1) {
+#else
+ && MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
+#endif
byte = read_escaped_byte(&p, end, err);
if (byte == -1) {
return -1;
@@ -1824,11 +1885,13 @@
chbuf[chlen++] = byte;
}
+#if !WITH_OBJC
l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
if (MBCLEN_INVALID_P(l)) {
strcpy(err, "invalid multibyte escape");
return -1;
}
+#endif
if (1 < chlen || (chbuf[0] & 0x80)) {
rb_str_buf_cat(buf, chbuf, chlen);
@@ -1876,12 +1939,14 @@
len = rb_uv_to_utf8(utf8buf, uv);
rb_str_buf_cat(buf, utf8buf, len);
+#if !WITH_OBJC
if (*encp == 0)
*encp = rb_utf8_encoding();
else if (*encp != rb_utf8_encoding()) {
strcpy(err, "UTF-8 character in non UTF-8 regexp");
return -1;
}
+#endif
}
return 0;
}
@@ -1954,12 +2019,16 @@
char smallbuf[2];
while (p < end) {
+#if WITH_OBJC
+ int chlen = 1;
+#else
int chlen = rb_enc_precise_mbclen(p, end, enc);
if (!MBCLEN_CHARFOUND_P(chlen)) {
strcpy(err, "invalid multibyte character");
return -1;
}
chlen = MBCLEN_CHARFOUND_LEN(chlen);
+#endif
if (1 < chlen || (*p & 0x80)) {
rb_str_buf_cat(buf, p, chlen);
p += chlen;
@@ -2057,14 +2126,18 @@
*fixed_enc = 0;
else {
*fixed_enc = enc;
+#if !WITH_OBJC
rb_enc_associate(buf, enc);
+#endif
}
if (unescape_nonascii(p, end, enc, buf, fixed_enc, err) != 0)
return Qnil;
if (*fixed_enc) {
+#if !WITH_OBJC
rb_enc_associate(buf, *fixed_enc);
+#endif
}
return buf;
@@ -2144,7 +2217,9 @@
rb_str_buf_append(result, str);
}
if (regexp_enc) {
+#if !WITH_OBJC
rb_enc_associate(result, regexp_enc);
+#endif
}
return result;
@@ -2157,7 +2232,11 @@
struct RRegexp *re = RREGEXP(obj);
VALUE unescaped;
rb_encoding *fixed_enc = 0;
+#if WITH_OBJC
+ rb_encoding *a_enc = NULL;
+#else
rb_encoding *a_enc = rb_ascii8bit_encoding();
+#endif
if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)
rb_raise(rb_eSecurityError, "Insecure: can't modify regexp");
@@ -2184,11 +2263,15 @@
enc = fixed_enc;
}
}
+#if !WITH_OBJC
else if (!(options & ARG_ENCODING_FIXED)) {
enc = rb_usascii_encoding();
}
+#endif
+#if !WITH_OBJC
rb_enc_associate((VALUE)re, enc);
+#endif
if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
re->basic.flags |= KCODE_FIXED;
}
@@ -2214,6 +2297,8 @@
int ret;
rb_encoding *enc = rb_enc_get(str);
if (options & ARG_ENCODING_NONE) {
+#if !WITH_OBJC
+ /* TODO */
rb_encoding *ascii8bit = rb_ascii8bit_encoding();
if (enc != ascii8bit) {
if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
@@ -2222,6 +2307,7 @@
}
enc = ascii8bit;
}
+#endif
}
ret = rb_reg_initialize(obj, RSTRING_CPTR(str), RSTRING_CLEN(str), enc,
options, err);
@@ -2277,7 +2363,11 @@
VALUE
rb_reg_new(const char *s, long len, int options)
{
+#if WITH_OBJC
+ return rb_enc_reg_new(s, len, NULL, options);
+#else
return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
+#endif
}
VALUE
@@ -2302,7 +2392,11 @@
{
volatile VALUE save_str = str;
if (reg_cache && RREGEXP(reg_cache)->len == RSTRING_CLEN(str)
+#if WITH_OBJC
+ && rb_enc_get(reg_cache) == rb_enc_get(str)
+#else
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
+#endif
&& memcmp(RREGEXP(reg_cache)->str, RSTRING_CPTR(str), RSTRING_CLEN(str)) == 0)
return reg_cache;
@@ -2359,7 +2453,11 @@
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
+#if WITH_OBJC
+ if (rb_enc_get(re1) != rb_enc_get(re2)) return Qfalse;
+#else
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
+#endif
if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0) {
return Qtrue;
}
@@ -2643,7 +2741,9 @@
if (argc == 3 && !NIL_P(argv[2])) {
char *kcode = StringValuePtr(argv[2]);
if (kcode[0] == 'n' || kcode[1] == 'N') {
+#if !WITH_OBJC
enc = rb_ascii8bit_encoding();
+#endif
flags |= ARG_ENCODING_FIXED;
}
else {
@@ -2669,18 +2769,27 @@
char *t;
VALUE tmp;
int c, clen;
+#if WITH_OBJC
+ int ascii_only = 0;
+#else
int ascii_only = rb_enc_str_asciionly_p(str);
+#endif
s = RSTRING_CPTR(str);
if (s == NULL)
return str;
send = s + RSTRING_CLEN(str);
while (s < send) {
- c = rb_enc_ascget(s, send, &clen, enc);
+#if WITH_OBJC
+ c = *s;
+ clen = 1;
+#else
+ c = rb_enc_ascget(s, send, &clen, enc);
if (c == -1) {
s += mbclen(s, send, enc);
continue;
}
+#endif
switch (c) {
case '[': case ']': case '{': case '}':
case '(': case ')': case '|': case '-':
@@ -2694,24 +2803,32 @@
}
if (ascii_only) {
str = rb_str_new3(str);
+#if !WITH_OBJC
rb_enc_associate(str, rb_usascii_encoding());
+#endif
}
return str;
meta_found:
tmp = rb_str_new(0, RSTRING_CLEN(str)*2);
+#if !WITH_OBJC
if (ascii_only) {
rb_enc_associate(tmp, rb_usascii_encoding());
}
else {
rb_enc_copy(tmp, str);
}
+#endif
t = RSTRING_PTR(tmp);
/* copy upto metacharacter */
memcpy(t, RSTRING_CPTR(str), s - RSTRING_CPTR(str));
t += s - RSTRING_CPTR(str);
while (s < send) {
+#if WITH_OBJC
+ c = *s;
+ clen = 1;
+#else
c = rb_enc_ascget(s, send, &clen, enc);
if (c == -1) {
int n = mbclen(s, send, enc);
@@ -2720,6 +2837,7 @@
*t++ = *s++;
continue;
}
+#endif
s += clen;
switch (c) {
case '[': case ']': case '{': case '}':
@@ -2758,7 +2876,9 @@
}
rb_str_resize(tmp, t - RSTRING_PTR(tmp));
RSTRING_SYNC(tmp);
+#if !WITH_OBJC
OBJ_INFECT(tmp, str);
+#endif
return tmp;
}
@@ -2894,9 +3014,11 @@
rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
}
+#if !WITH_OBJC
else if (rb_enc_str_asciionly_p(e)) {
has_asciionly = 1;
}
+#endif
else {
if (!has_ascii_compat_fixed)
has_ascii_compat_fixed = enc;
@@ -2917,12 +3039,15 @@
}
}
+#if !WITH_OBJC
if (i == 0) {
rb_enc_copy(source, v);
}
+#endif
rb_str_append(source, v);
}
+#if !WITH_OBJC
if (has_ascii_incompat) {
result_enc = has_ascii_incompat;
}
@@ -2934,6 +3059,7 @@
}
rb_enc_associate(source, result_enc);
+#endif
return rb_class_new_instance(1, &source, rb_cRegexp);
}
}
@@ -2998,18 +3124,24 @@
rb_encoding *str_enc = rb_enc_get(str);
rb_encoding *src_enc = rb_enc_get(src);
+#if !WITH_OBJC
rb_enc_check(str, src);
+#endif
p = s = RSTRING_CPTR(str);
e = s + RSTRING_CLEN(str);
while (s < e) {
+ const char *ss;
+#if WITH_OBJC
+ int c = *s;
+ clen = 1;
+#else
int c = rb_enc_ascget(s, e, &clen, str_enc);
- const char *ss;
-
if (c == -1) {
s += mbclen(s, e, str_enc);
continue;
}
+#endif
ss = s;
s += clen;
@@ -3020,6 +3152,10 @@
}
rb_enc_str_buf_cat(val, p, ss-p, str_enc);
+#if WITH_OBJC
+ c = *s;
+ clen = 1;
+#else
c = rb_enc_ascget(s, e, &clen, str_enc);
if (c == -1) {
s += mbclen(s, e, str_enc);
@@ -3027,6 +3163,7 @@
p = s;
continue;
}
+#endif
s += clen;
p = s;
@@ -3042,14 +3179,26 @@
break;
case 'k':
+#if WITH_OBJC
+ clen = 1;
+ if (s < e && *s == '<') {
+#else
if (s < e && rb_enc_ascget(s, e, &clen, str_enc) == '<') {
+#endif
const char *name, *name_end;
name_end = name = s + clen;
while (name_end < e) {
+#if WITH_OBJC
+ c = *name_end;
+ clen = 1;
+ if (c == '>') break;
+ name_end += clen;
+#else
c = rb_enc_ascget(name_end, e, &clen, str_enc);
if (c == '>') break;
name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
+#endif
}
if (name_end < e) {
no = name_to_backref_number(regs, regexp, name, name_end);
Modified: MacRuby/trunk/ruby.c
===================================================================
--- MacRuby/trunk/ruby.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/ruby.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -91,7 +91,11 @@
struct {
struct {
VALUE name;
+#if WITH_OBJC
+ rb_encoding *enc;
+#else
int index;
+#endif
} enc;
} src, ext;
};
@@ -926,6 +930,17 @@
Init_prelude();
}
+#if WITH_OBJC
+static rb_encoding *
+opt_enc_find(VALUE enc_name)
+{
+ rb_encoding *enc = rb_enc_find2(enc_name);
+ if (enc == NULL)
+ rb_raise(rb_eRuntimeError, "unknown encoding name - %s",
+ RSTRING_CPTR(enc_name));
+ return enc;
+}
+#else
static int
opt_enc_index(VALUE enc_name)
{
@@ -940,8 +955,13 @@
}
return i;
}
+#endif
+#if WITH_OBJC
+static rb_encoding *src_encoding;
+#else
static int src_encoding_index = -1; /* TODO: VM private */
+#endif
static VALUE
process_options(VALUE arg)
@@ -1059,15 +1079,30 @@
parser = rb_parser_new();
if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue);
if (opt->ext.enc.name != 0) {
+#if WITH_OBJC
+ opt->ext.enc.enc = opt_enc_find(opt->ext.enc.name);
+#else
opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
+#endif
}
if (opt->src.enc.name != 0) {
+#if WITH_OBJC
+ opt->src.enc.enc = opt_enc_find(opt->src.enc.name);
+ src_encoding = opt->src.enc.enc;
+#else
opt->src.enc.index = opt_enc_index(opt->src.enc.name);
src_encoding_index = opt->src.enc.index;
+#endif
}
+#if WITH_OBJC
+ if (opt->ext.enc.enc != NULL) {
+ enc = opt->ext.enc.enc;
+ }
+#else
if (opt->ext.enc.index >= 0) {
enc = rb_enc_from_index(opt->ext.enc.index);
}
+#endif
else {
enc = rb_locale_encoding();
}
@@ -1075,13 +1110,21 @@
if (opt->e_script) {
rb_encoding *eenc;
+#if WITH_OBJC
+ if (opt->src.enc.enc != NULL) {
+ eenc = opt->src.enc.enc;
+ }
+#else
if (opt->src.enc.index >= 0) {
eenc = rb_enc_from_index(opt->src.enc.index);
}
+#endif
else {
eenc = rb_locale_encoding();
}
+#if !WITH_OBJC
rb_enc_associate(opt->e_script, eenc);
+#endif
require_libraries();
tree = rb_parser_compile_string(parser, opt->script, opt->e_script, 1);
}
@@ -1233,11 +1276,20 @@
}
rb_io_ungetc(f, INT2FIX('#'));
if (no_src_enc && opt->src.enc.name) {
+#if WITH_OBJC
+ opt->src.enc.enc = opt_enc_find(opt->src.enc.name);
+ src_encoding = opt->src.enc.enc;
+#else
opt->src.enc.index = opt_enc_index(opt->src.enc.name);
src_encoding_index = opt->src.enc.index;
+#endif
}
if (no_ext_enc && opt->ext.enc.name) {
+#if WITH_OBJC
+ opt->ext.enc.enc = opt_enc_find(opt->ext.enc.name);
+#else
opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
+#endif
}
}
else if (!NIL_P(c)) {
@@ -1245,6 +1297,14 @@
}
require_libraries(); /* Why here? unnatural */
}
+#if WITH_OBJC
+ if (opt->src.enc.enc != NULL) {
+ enc = opt->src.enc.enc;
+ }
+ else {
+ enc = rb_locale_encoding();
+ }
+#else
if (opt->src.enc.index >= 0) {
enc = rb_enc_from_index(opt->src.enc.index);
}
@@ -1254,6 +1314,7 @@
else {
enc = rb_usascii_encoding();
}
+#endif
rb_funcall(f, rb_intern("set_encoding"), 1, rb_enc_from_encoding(enc));
tree = (NODE *)rb_parser_compile_file(parser, fname, f, line_start);
rb_funcall(f, rb_intern("set_encoding"), 1, rb_parser_encoding(parser));
@@ -1272,7 +1333,11 @@
struct cmdline_options opt;
MEMZERO(&opt, opt, 1);
+#if WITH_OBJC
+ opt.src.enc.enc = src_encoding;
+#else
opt.src.enc.index = src_encoding_index;
+#endif
return load_file(rb_parser_new(), fname, 0, &opt);
}
@@ -1504,8 +1569,13 @@
args.argc = argc;
args.argv = argv;
args.opt = &opt;
+#if WITH_OBJC
+ opt.src.enc.enc = src_encoding;
+ opt.ext.enc.enc = NULL;
+#else
opt.src.enc.index = src_encoding_index;
opt.ext.enc.index = -1;
+#endif
tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(),
process_options, (VALUE)&args,
0, rb_progname);
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/string.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -215,15 +215,18 @@
}
#endif
-#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
-#define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
-
#if WITH_OBJC
-# define STR_ENC_GET(str) (rb_ascii8bit_encoding()) /* TODO */
+/* TODO */
+# define is_ascii_string(str) (1)
+# define is_broken_string(str) (0)
+# define STR_ENC_GET(str) (NULL)
#else
+# define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
+# define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
# define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
#endif
+#if !WITH_OBJC
static int
single_byte_optimizable(VALUE str)
{
@@ -240,9 +243,11 @@
* "\xa1" in Shift_JIS for example. */
return 0;
}
+#endif
VALUE rb_fs;
+#if !WITH_OBJC
static inline const char *
search_nonascii(const char *p, const char *e)
{
@@ -420,9 +425,6 @@
int
rb_enc_str_coderange(VALUE str)
{
-#if WITH_OBJC
- return ENC_CODERANGE_VALID;
-#else
int cr = ENC_CODERANGE(str);
if (cr == ENC_CODERANGE_UNKNOWN) {
@@ -431,15 +433,11 @@
ENC_CODERANGE_SET(str, cr);
}
return cr;
-#endif
}
int
rb_enc_str_asciionly_p(VALUE str)
{
-#if WITH_OBJC
- return Qtrue;
-#else
rb_encoding *enc = STR_ENC_GET(str);
if (!rb_enc_asciicompat(enc))
@@ -447,8 +445,8 @@
else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
return Qtrue;
return Qfalse;
+}
#endif
-}
static inline void
str_mod_check(VALUE s, const char *p, long len)
@@ -585,7 +583,7 @@
{
VALUE str = str_new(rb_cString, ptr, len);
- ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ //ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
return str;
}
@@ -908,6 +906,7 @@
return str;
}
+#if !WITH_OBJC
long
rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
{
@@ -990,6 +989,7 @@
if (!*cr) *cr = ENC_CODERANGE_7BIT;
return c;
}
+#endif
static long
str_strlen(VALUE str, rb_encoding *enc)
@@ -1399,6 +1399,7 @@
return rb_check_string_type(str);
}
+#if !WITH_OBJC
char*
rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc)
{
@@ -1512,7 +1513,8 @@
if (!pp) return e - p;
return pp - p;
}
-#endif
+#endif /* NONASCII_MASK */
+#endif /* WITH_OBJC */
/* byte offset to char offset */
long
@@ -2904,6 +2906,7 @@
NEIGHBOR_WRAPPED
};
+#if !WITH_OBJC
static enum neighbor_char
enc_succ_char(char *p, int len, rb_encoding *enc)
{
@@ -2967,6 +2970,7 @@
}
}
}
+#endif
/*
overwrite +p+ by succeeding letter in +enc+ and returns
@@ -2980,6 +2984,10 @@
static enum neighbor_char
enc_succ_alnum_char(char *p, int len, rb_encoding *enc, char *carry)
{
+#if WITH_OBJC
+ /* TODO rewrite me */
+ return NEIGHBOR_NOT_CHAR;
+#else
enum neighbor_char ret;
int c;
int ctype;
@@ -3031,6 +3039,7 @@
MEMCPY(carry, p, char, len);
enc_succ_char(carry, len, enc);
return NEIGHBOR_WRAPPED;
+#endif
}
@@ -3062,6 +3071,69 @@
VALUE
rb_str_succ(VALUE orig)
{
+#if WITH_OBJC
+ UniChar *buf;
+ UniChar carry;
+ long i, len;
+ bool modified;
+
+ len = CFStringGetLength((CFStringRef)orig);
+ if (len == 0)
+ return orig;
+
+ buf = (UniChar *)alloca(sizeof(UniChar) * (len + 1));
+ buf++;
+
+ CFStringGetCharacters((CFStringRef)orig, CFRangeMake(0, len), buf);
+ modified = false;
+ carry = 0;
+
+ for (i = len - 1; i >= 0; i--) {
+ UniChar c = buf[i];
+ if (iswdigit(c)) {
+ modified = true;
+ if (c != '9') {
+ buf[i]++;
+ carry = 0;
+ break;
+ }
+ else {
+ buf[i] = '0';
+ carry = '1';
+ }
+ }
+ else if (iswalpha(c)) {
+ bool lower = islower(c);
+ UniChar e = lower ? 'z' : 'Z';
+ modified = true;
+ if (c != e) {
+ buf[i]++;
+ carry = 0;
+ break;
+ }
+ else {
+ carry = buf[i] = lower ? 'a' : 'A';
+ }
+ }
+ }
+
+ if (!modified) {
+ buf[len-1]++;
+ }
+ else if (carry != 0) {
+ buf--;
+ *buf = carry;
+ len++;
+ }
+
+ CFMutableStringRef newstr;
+
+ newstr = CFStringCreateMutable(NULL, 0);
+ CFStringAppendCharacters(newstr, buf, len);
+ CFMakeCollectable(newstr);
+
+ return (VALUE)newstr;
+#else
rb_encoding *enc;
VALUE str;
char *sbeg, *s, *e;
@@ -3133,6 +3205,7 @@
rb_enc_str_coderange(str);
#endif
return str;
+#endif
}
@@ -3186,8 +3259,28 @@
rb_scan_args(argc, argv, "11", &end, &exclusive);
excl = RTEST(exclusive);
- succ = rb_intern("succ");
StringValue(end);
+#if WITH_OBJC
+ if (RSTRING_CLEN(beg) == 1 && RSTRING_CLEN(end) == 1) {
+ UniChar c = CFStringGetCharacterAtIndex((CFStringRef)beg, 0);
+ UniChar e = CFStringGetCharacterAtIndex((CFStringRef)end, 0);
+
+ if (c > e || (excl && c == e))
+ return beg;
+ for (;;) {
+ CFMutableStringRef substr;
+ substr = CFStringCreateMutable(NULL, 0);
+ CFStringAppendCharacters(substr, &c, 1);
+ CFMakeCollectable(substr);
+ rb_yield((VALUE)substr);
+ if (!excl && c == e)
+ break;
+ c++;
+ if (excl && c == e)
+ break;
+ }
+ return beg;
+#else
enc = rb_enc_check(beg, end);
if (RSTRING_CLEN(beg) == 1 && RSTRING_CLEN(end) == 1 &&
is_ascii_string(beg) && is_ascii_string(end)) {
@@ -3202,10 +3295,12 @@
if (excl && c == e) break;
}
return beg;
+#endif
}
n = rb_str_cmp(beg, end);
if (n > 0 || (excl && n == 0)) return beg;
+ succ = rb_intern("succ");
after_end = rb_funcall(end, succ, 0, 0);
current = beg;
while (!rb_str_equal(current, after_end)) {
@@ -3456,9 +3551,13 @@
end = END(nth);
len = end - start;
StringValue(val);
+#if !WITH_OBJC
enc = rb_enc_check(str, val);
+#endif
rb_str_splice_0(str, start, len, val);
+#if !WITH_OBJC
rb_enc_associate(str, enc);
+#endif
}
static VALUE
@@ -3681,7 +3780,9 @@
pat = get_pat(argv[0], 1);
if (rb_reg_search(pat, str, 0, 0) >= 0) {
rb_encoding *enc;
+#if !WITH_OBJC
int cr = ENC_CODERANGE(str);
+#endif
match = rb_backref_get();
regs = RMATCH_REGS(match);
@@ -3859,9 +3960,7 @@
val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
val = rb_obj_as_string(val);
}
-#if !WITH_OBJC
str_mod_check(str, sp, slen);
-#endif
if (bang) str_frozen_check(str);
if (val == dest) { /* paranoid check [ruby-dev:24827] */
rb_raise(rb_eRuntimeError, "block should not cheat");
@@ -3888,7 +3987,11 @@
* in order to prevent infinite loops.
*/
if (slen <= END(0)) break;
+#if WITH_OBJC
+ len = 1;
+#else
len = rb_enc_mbclen(sp+END(0), sp+slen, str_enc);
+#endif
rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
offset = END(0) + len;
}
@@ -4340,11 +4443,16 @@
static void
str_cat_char(VALUE str, int c, rb_encoding *enc)
{
+#if WITH_OBJC
+ CFStringAppendCharacters((CFMutableStringRef)str,
+ (const UniChar *)&c, 1);
+#else
char s[16];
int n = rb_enc_codelen(c, enc);
rb_enc_mbcput(c, s, enc);
rb_enc_str_buf_cat(str, s, n, enc);
+#endif
}
static void
@@ -4388,14 +4496,20 @@
p = RSTRING_PTR(str); pend = RSTRING_END(str);
#endif
result = rb_str_buf_new2("");
+#if !WITH_OBJC
if (!rb_enc_asciicompat(enc)) enc = rb_usascii_encoding();
rb_enc_associate(result, enc);
+#endif
str_cat_char(result, '"', enc);
while (p < pend) {
int c;
int n;
int cc;
+#if WITH_OBJC
+ c = *p;
+ n = 1;
+#else
n = rb_enc_precise_mbclen(p, pend, enc);
if (!MBCLEN_CHARFOUND_P(n)) {
p++;
@@ -4406,13 +4520,18 @@
c = rb_enc_codepoint(p, pend, enc);
n = rb_enc_codelen(c, enc);
+#endif
p += n;
if (c == '"'|| c == '\\' ||
(c == '#' &&
p < pend &&
+#if WITH_OBJC
+ ((cc = *p),
+#else
MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
(cc = rb_enc_codepoint(p,pend,enc),
+#endif
(cc == '$' || cc == '@' || cc == '{')))) {
prefix_escape(result, c, enc);
}
@@ -4522,7 +4641,7 @@
}
if (!rb_enc_asciicompat(enc0)) {
len += 19; /* ".force_encoding('')" */
- len += strlen(enc0->name);
+ len += strlen(rb_enc_name(enc0));
}
result = rb_str_new5(str, 0, len);
@@ -4584,13 +4703,17 @@
}
*q++ = '"';
if (!rb_enc_asciicompat(enc0)) {
- sprintf(q, ".force_encoding(\"%s\")", enc0->name);
+ sprintf(q, ".force_encoding(\"%s\")", rb_enc_name(enc0));
+#if !WITH_OBJC
enc0 = rb_ascii8bit_encoding();
+#endif
}
OBJ_INFECT(result, str);
/* result from dump is ASCII */
+#if !WITH_OBJC
rb_enc_associate(result, enc0);
+#endif
RSTRING_SYNC(result);
return result;
}
@@ -5512,7 +5635,9 @@
}
if (modify) {
+#if !WITH_OBJC
rb_enc_associate(str, enc);
+#endif
return str;
}
return Qnil;
@@ -6016,13 +6141,16 @@
VALUE spat;
VALUE limit;
int awk_split = Qfalse;
+ int spat_string = Qfalse;
long beg, end, i = 0;
int lim = 0;
VALUE result, tmp;
const char *cstr;
long clen;
+#if !WITH_OBJC
cstr = RSTRING_CPTR(str);
+#endif
clen = RSTRING_CLEN(str);
if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
@@ -6036,11 +6164,7 @@
i = 1;
}
-#if WITH_OBJC
- enc = rb_ascii8bit_encoding();
-#else
enc = STR_ENC_GET(str);
-#endif
result = rb_ary_new();
if (NIL_P(spat)) {
if (!NIL_P(rb_fs)) {
@@ -6052,17 +6176,19 @@
else {
fs_set:
if (TYPE(spat) == T_STRING) {
+ spat_string = Qtrue;
+#if WITH_OBJC
+ if (RSTRING_CLEN(spat) == 1
+ && CFStringGetCharacterAtIndex((CFStringRef)spat, 0) == ' ') {
+ awk_split = Qtrue;
+ }
+#else
const char *spat_cstr;
long spat_clen;
-#if WITH_OBJC
- rb_encoding *enc2 = rb_ascii8bit_encoding();
-#else
rb_encoding *enc2 = STR_ENC_GET(spat);
-#endif
spat_cstr = RSTRING_CPTR(spat);
spat_clen = RSTRING_CLEN(spat);
-
if (rb_enc_mbminlen(enc2) == 1) {
if (spat_clen == 1 && spat_cstr[0] == ' '){
awk_split = Qtrue;
@@ -6076,47 +6202,59 @@
}
}
if (!awk_split) {
+ spat = rb_reg_regcomp(rb_reg_quote(spat));
+ }
+#endif
+ }
+ else {
+ spat = get_pat(spat, 1);
+ }
+ }
+
+ beg = 0;
#if WITH_OBJC
- CFRange search_range;
- search_range = CFRangeMake(0, clen);
- do {
- CFRange result_range;
- CFRange substr_range;
- if (!CFStringFindWithOptions((CFStringRef)str,
+ if (awk_split || spat_string) {
+ CFRange search_range;
+ if (spat == Qnil)
+ spat = (VALUE)CFSTR(" ");
+ search_range = CFRangeMake(0, clen);
+ do {
+ CFRange result_range;
+ CFRange substr_range;
+ if (!CFStringFindWithOptions((CFStringRef)str,
(CFStringRef)spat,
search_range,
0,
&result_range))
- break;
+ break;
- substr_range.location = search_range.location;
- substr_range.length = result_range.location
- - search_range.location;
+ substr_range.location = search_range.location;
+ substr_range.length = result_range.location
+ - search_range.location;
- rb_ary_push(result,
- rb_str_subseq(str, substr_range.location,
- substr_range.length));
+ if (awk_split == Qfalse || substr_range.length > 0) {
+ VALUE substr;
+
+ substr = rb_str_subseq(str, substr_range.location,
+ substr_range.length);
- search_range.location = result_range.location
- + result_range.length;
- search_range.length = clen - search_range.location;
+ if (awk_split == Qtrue) {
+ CFStringTrimWhitespace((CFMutableStringRef)substr);
+ if (CFStringGetLength((CFStringRef)substr) > 0)
+ rb_ary_push(result, substr);
}
- while ((limit == Qnil || --lim > 1));
- rb_ary_push(result,
- rb_str_subseq(str, search_range.location,
- search_range.length));
- goto done;
-#else
- spat = rb_reg_regcomp(rb_reg_quote(spat));
-#endif
+ else {
+ rb_ary_push(result, substr);
+ }
}
+
+ search_range.location = result_range.location
+ + result_range.length;
+ search_range.length = clen - search_range.location;
}
- else {
- spat = get_pat(spat, 1);
- }
- }
-
- beg = 0;
+ while ((limit == Qnil || --lim > 1));
+ beg = search_range.location;
+#else
if (awk_split) {
const char *ptr = cstr;
const char *eptr = cstr+clen;
@@ -6150,6 +6288,7 @@
}
}
}
+#endif
}
else {
long start = beg;
@@ -6160,22 +6299,34 @@
while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
regs = RMATCH_REGS(rb_backref_get());
if (start == end && BEG(0) == END(0)) {
+#if WITH_OBJC
+ if (0) {
+#else
if (!cstr) {
- //rb_ary_push(result, rb_str_new("", 0));
+ rb_ary_push(result, rb_str_new("", 0));
+#endif
break;
}
else if (last_null == 1) {
+#if WITH_OBJC
+ rb_ary_push(result, rb_str_subseq(str, beg, 1));
+#else
rb_ary_push(result, rb_str_subseq(str, beg,
rb_enc_mbclen(cstr+beg,
cstr+clen,
enc)));
+#endif
beg = start;
}
else {
- if (cstr+start == cstr+clen)
+ if (start == clen)
start++;
else
+#if WITH_OBJC
+ start += 1;
+#else
start += rb_enc_mbclen(cstr+start,cstr+clen,enc);
+#endif
last_null = 1;
continue;
}
@@ -6204,7 +6355,6 @@
tmp = rb_str_subseq(str, beg, clen-beg);
rb_ary_push(result, tmp);
}
-done:
if (NIL_P(limit) && lim == 0) {
while (RARRAY_LEN(result) > 0 &&
RSTRING_CLEN(RARRAY_AT(result, RARRAY_LEN(result)-1)) == 0)
@@ -7885,7 +8035,7 @@
VALUE str;
sym = rb_id2str(id);
- if (!rb_enc_symname_p(RSTRING_CPTR(sym), rb_ascii8bit_encoding())) {
+ if (!rb_enc_symname_p(RSTRING_CPTR(sym), NULL)) {
sym = rb_str_inspect(sym);
}
str = rb_str_new(":", 1);
Modified: MacRuby/trunk/time.c
===================================================================
--- MacRuby/trunk/time.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/time.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -2097,7 +2097,7 @@
rb_str_cat(str, buf, len);
p += strlen(p);
if (buf != buffer) {
- free(buf);
+ xfree(buf);
buf = buffer;
}
for (fmt = p; p < pe && !*p; ++p);
@@ -2109,8 +2109,10 @@
len = rb_strftime(&buf, RSTRING_CPTR(format), &tobj->tm);
}
str = rb_str_new(buf, len);
- if (buf != buffer) free(buf);
+ if (buf != buffer) xfree(buf);
+#if !WITH_OBJC
rb_enc_copy(str, format);
+#endif
return str;
}
Modified: MacRuby/trunk/transcode.c
===================================================================
--- MacRuby/trunk/transcode.c 2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/transcode.c 2008-05-15 08:06:22 UTC (rev 192)
@@ -10,6 +10,9 @@
**********************************************************************/
#include "ruby/ruby.h"
+
+#if !WITH_OBJC
+
#include "ruby/encoding.h"
#define PType (int)
#include "transcode_data.h"
@@ -441,15 +444,35 @@
return newstr;
}
+#else // WITH_OBJC
+
+static VALUE
+rb_str_transcode(int argc, VALUE *argv, VALUE self)
+{
+ /* TODO */
+ return self;
+}
+
+static VALUE
+rb_str_transcode_bang(int argc, VALUE *argv, VALUE self)
+{
+ /* TODO */
+ return self;
+}
+
+#endif
+
void
Init_transcode(void)
{
+#if !WITH_OBJC
transcoder_table = st_init_strcasetable();
transcoder_lib_table = st_init_strcasetable();
init_transcoder_table();
sym_invalid = ID2SYM(rb_intern("invalid"));
sym_ignore = ID2SYM(rb_intern("ignore"));
+#endif
rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.macosforge.org/pipermail/macruby-changes/attachments/20080515/b6022fda/attachment-0001.htm
More information about the macruby-changes
mailing list