[192] MacRuby/trunk

15 May 2008

Revision: 192
          http://trac.macosforge.org/projects/ruby/changeset/192
Author:   lsansonetti@apple.com
Date:     2008-05-15 01:06:22 -0700 (Thu, 15 May 2008)

Log Message:
-----------
removing the upstream encoding implementations and using CFString's instead + misc bug and memory leak fixes

Modified Paths:
--------------
    MacRuby/trunk/bs.c
    MacRuby/trunk/debug.c
    MacRuby/trunk/encoding.c
    MacRuby/trunk/gc.c
    MacRuby/trunk/include/ruby/encoding.h
    MacRuby/trunk/io.c
    MacRuby/trunk/marshal.c
    MacRuby/trunk/numeric.c
    MacRuby/trunk/objc.m
    MacRuby/trunk/parse.y
    MacRuby/trunk/re.c
    MacRuby/trunk/ruby.c
    MacRuby/trunk/string.c
    MacRuby/trunk/time.c
    MacRuby/trunk/transcode.c

Modified: MacRuby/trunk/bs.c
===================================================================

--- MacRuby/trunk/bs.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/bs.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -186,6 +186,7 @@
         break;
     }
   }
+  free(type_modifier);
 } 
 
 static inline bool
@@ -921,6 +922,7 @@
             ASSERT_ALLOC(bs_informal_method);
 
             bs_informal_method->name = sel_registerName(selector);
+	    free(selector);
             bs_informal_method->class_method = 
               get_boolean_attribute(reader, "class_method", false);
             bs_informal_method->type = method_type;

Modified: MacRuby/trunk/debug.c
===================================================================
--- MacRuby/trunk/debug.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/debug.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -21,6 +21,7 @@
     enum ruby_value_type        value_type;
     enum node_type              node_type;
     enum {
+#if !WITH_OBJC
         RUBY_ENCODING_INLINE_MAX = ENCODING_INLINE_MAX,
         RUBY_ENCODING_SHIFT = ENCODING_SHIFT,
         RUBY_ENCODING_MASK  = ENCODING_MASK,
@@ -28,7 +29,8 @@
         RUBY_ENC_CODERANGE_UNKNOWN = ENC_CODERANGE_UNKNOWN,
         RUBY_ENC_CODERANGE_7BIT    = ENC_CODERANGE_7BIT,
         RUBY_ENC_CODERANGE_VALID   = ENC_CODERANGE_VALID,
-        RUBY_ENC_CODERANGE_BROKEN  = ENC_CODERANGE_BROKEN, 
+        RUBY_ENC_CODERANGE_BROKEN  = ENC_CODERANGE_BROKEN,
+#endif
         RUBY_FL_MARK        = FL_MARK,
         RUBY_FL_RESERVED    = FL_RESERVED,
         RUBY_FL_FINALIZE    = FL_FINALIZE,

Modified: MacRuby/trunk/encoding.c
===================================================================
--- MacRuby/trunk/encoding.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/encoding.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -20,6 +20,100 @@
 static ID id_encoding, id_base_encoding;
 static VALUE rb_cEncoding;
 
+#if WITH_OBJC
+
+static CFMutableDictionaryRef __encodings = NULL;
+
+static VALUE
+enc_new(const CFStringEncoding *enc)
+{
+    return Data_Wrap_Struct(rb_cEncoding, NULL, NULL, (void *)enc);
+}
+
+static void
+enc_init_db(void)
+{
+    const CFStringEncoding *e;
+
+    __encodings = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
+    
+    e = CFStringGetListOfAvailableEncodings();
+    while (e != NULL && *e != kCFStringEncodingInvalidId) {
+	VALUE iana;
+	VALUE encoding;
+
+	encoding = enc_new(e);
+
+	iana = (VALUE)CFStringConvertEncodingToIANACharSetName(*e);
+	if (iana != 0) {
+	    const char *name;
+	    char *p;
+
+	    name = RSTRING_CPTR(iana);
+	    p = strchr(name, '-');
+	    if ((p = strchr(name, '-')) != NULL
+		|| islower(*name)) {
+		char *tmp = alloca(strlen(name));
+		strcpy(tmp, name);
+		if (p != NULL) {
+		    p = tmp + (p - name);
+		    do {
+			*p = '_';
+			p++;
+			p = strchr(p, '-');	
+		    }
+		    while (p != NULL);
+		}
+		if (islower(*tmp))
+		    *tmp = toupper(*tmp);
+		name = tmp;
+	    }
+	    rb_define_const(rb_cEncoding, name, encoding);
+	}
+	CFDictionarySetValue(__encodings, (const void *)(*e), 
+	    (const void *)encoding);
+	e++;
+    }
+
+    assert(CFDictionaryGetCount((CFDictionaryRef)__encodings) > 0);
+}
+
+static VALUE
+enc_make(const CFStringEncoding *enc)
+{
+    VALUE v;
+    v = (VALUE)CFDictionaryGetValue( (CFDictionaryRef)__encodings, 
+	(const void *)(*enc));
+    assert(v != 0);
+    return v;
+}
+
+VALUE
+rb_enc_from_encoding(rb_encoding *enc)
+{
+    return enc_make(enc);
+}
+
+static inline CFStringEncoding
+rb_enc_to_enc(VALUE v)
+{
+    return *(CFStringEncoding *)DATA_PTR(v);
+}
+
+static inline CFStringEncoding *
+rb_enc_to_enc_ptr(VALUE v)
+{
+    return (CFStringEncoding *)DATA_PTR(v);
+}
+
+rb_encoding *
+rb_to_encoding(VALUE v)
+{
+    return rb_enc_to_enc_ptr(v);
+}
+
+#else
+
 struct rb_encoding_entry {
     const char *name;
     rb_encoding *enc;
@@ -81,9 +175,6 @@
 {
     VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, 0, encoding);
     encoding->auxiliary_data = (void *)enc;
-#if WITH_OBJC
-    rb_objc_retain(enc);
-#endif
     return enc;
 }
 
@@ -348,12 +439,17 @@
     return index;
 }
 #endif
+#endif // WITH_OBJC
 
 int
 rb_enc_dummy_p(rb_encoding *enc)
 {
+#if WITH_OBJC
+    return Qfalse;
+#else
     VALUE encoding = rb_enc_from_encoding(enc);
     return ENC_DUMMY_P(encoding);
+#endif
 }
 
 /*
@@ -375,6 +471,7 @@
     return rb_enc_dummy_p(rb_to_encoding(enc)) ? Qtrue : Qfalse;
 }
 
+#if !WITH_OBJC
 static int
 enc_alias(const char *alias, int idx)
 {
@@ -576,6 +673,7 @@
 	rb_raise(rb_eTypeError, "wrong argument type %s (not encode capable)", etype);
     }
 }
+#endif
 
 ID
 rb_id_encoding(void)
@@ -586,12 +684,10 @@
     return id_encoding;
 }
 
+#if !WITH_OBJC
 int
 rb_enc_internal_get_index(VALUE obj)
 {
-#if WITH_OBJC
-    return 0;
-#else
     int i;
 
     i = ENCODING_GET_INLINED(obj);
@@ -602,13 +698,11 @@
 	i = NUM2INT(iv);
     }
     return i;
-#endif
 }
 
 void
 rb_enc_internal_set_index(VALUE obj, int idx)
 {
-#if !WITH_OBJC
     if (idx < ENCODING_INLINE_MAX) {
 	ENCODING_SET_INLINED(obj, idx);
 	return;
@@ -616,7 +710,6 @@
     ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
     rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
     return;
-#endif
 }
 
 void
@@ -648,31 +741,31 @@
 rb_encoding*
 rb_enc_get(VALUE obj)
 {
-#if WITH_OBJC
-    return rb_ascii8bit_encoding(); /* FIXME */
-#else
     return rb_enc_from_index(rb_enc_get_index(obj));
-#endif
 }
 
 rb_encoding*
 rb_enc_check(VALUE str1, VALUE str2)
 {
-#if WITH_OBJC
-    return NULL;
-#else
     rb_encoding *enc = rb_enc_compatible(str1, str2);
     if (!enc)
 	rb_raise(rb_eArgError, "character encodings differ: %s and %s",
 		 rb_enc_name(rb_enc_get(str1)),
 		 rb_enc_name(rb_enc_get(str2)));
     return enc;
+}
 #endif
-}
 
 rb_encoding*
 rb_enc_compatible(VALUE str1, VALUE str2)
 {
+#if WITH_OBJC
+    /* TODO */
+    rb_encoding *enc = rb_enc_get(str1);
+    if (enc == rb_enc_get(str2))
+	return enc;
+    return NULL;
+#else
     int idx1, idx2;
     rb_encoding *enc1, *enc2;
 
@@ -724,15 +817,17 @@
 	    return enc2;
     }
     return 0;
+#endif
 }
 
+#if !WITH_OBJC
 void
 rb_enc_copy(VALUE obj1, VALUE obj2)
 {
     rb_enc_associate_index(obj1, rb_enc_get_index(obj2));
 }
+#endif
 
-
 /*
  *  call-seq:
  *     obj.encoding   => encoding
@@ -743,26 +838,18 @@
 VALUE
 rb_obj_encoding(VALUE obj)
 {
-#if WITH_OBJC
-    /* TODO */
-    return Qnil;
-#else
     rb_encoding *enc = rb_enc_get(obj);
     if (!enc) {
 	rb_raise(rb_eTypeError, "unknown encoding");
     }
     return rb_enc_from_encoding(enc);
-#endif
 }
 
+#if !WITH_OBJC
 int
 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
 {
-#if WITH_OBJC
-    int n = 1;
-#else
     int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
-#endif
     if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
         return MBCLEN_CHARFOUND_LEN(n);
     else {
@@ -840,6 +927,7 @@
 {
     return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c));
 }
+#endif
 
 /*
  * call-seq:
@@ -853,11 +941,24 @@
 static VALUE
 enc_inspect(VALUE self)
 {
+#if WITH_OBJC
+    char buffer[512];
+    VALUE enc_name;
+    long n;
+
+    enc_name = (VALUE)CFStringGetNameOfEncoding(rb_enc_to_enc(self));
+    
+    n = snprintf(buffer, sizeof buffer, "#<%s:%s>", rb_obj_classname(self),
+	RSTRING_CPTR(enc_name));
+
+    return rb_str_new(buffer, n);
+#else
     VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
 		      rb_enc_name((rb_encoding*)DATA_PTR(self)),
 		      (ENC_DUMMY_P(self) ? " (dummy)" : ""));
     ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
     return str;
+#endif
 }
 
 /*
@@ -871,7 +972,11 @@
 static VALUE
 enc_name(VALUE self)
 {
+#if WITH_OBJC
+    return (VALUE)CFStringConvertEncodingToIANACharSetName(rb_enc_to_enc(self));
+#else
     return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
+#endif
 }
 
 static VALUE
@@ -898,9 +1003,21 @@
  *       #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
  *
  */
+
 static VALUE
 enc_list(VALUE klass)
 {
+#if WITH_OBJC
+    VALUE ary;
+    const CFStringEncoding *e;
+
+    ary = rb_ary_new();
+    e = CFStringGetListOfAvailableEncodings();
+    while (e != NULL && *e != kCFStringEncodingInvalidId) {
+	rb_ary_push(ary, enc_make(e));
+	e++;
+    }
+#else
     VALUE ary = rb_ary_new2(enc_table.count);
     int i;
     for (i = 0; i < enc_table.count; ++i) {
@@ -909,6 +1026,7 @@
 	    rb_ary_push(ary, rb_enc_from_encoding(enc));
 	}
     }
+#endif
     return ary;
 }
 
@@ -925,8 +1043,25 @@
  *
  */
 static VALUE
+enc_find2(VALUE enc)
+{
+    CFStringEncoding e;
+    
+    e = CFStringConvertIANACharSetNameToEncoding((CFStringRef)StringValue(enc));
+    if (e == kCFStringEncodingInvalidId)
+	return Qnil;
+    return enc_make(&e);
+}
+
+static VALUE
 enc_find(VALUE klass, VALUE enc)
 {
+#if WITH_OBJC
+    VALUE e = enc_find2(enc);
+    if (e == Qnil)
+	rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
+    return e;
+#else
     int idx;
 
     StringValue(enc);
@@ -938,6 +1073,7 @@
 	rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
     }
     return rb_enc_from_encoding(rb_enc_from_index(idx));
+#endif
 }
 
 /*
@@ -983,6 +1119,7 @@
     return enc_find(klass, str);
 }
 
+#if !WITH_OBJC
 rb_encoding *
 rb_ascii8bit_encoding(void)
 {
@@ -1045,7 +1182,24 @@
 {
     return rb_enc_from_encoding(rb_default_external_encoding());
 }
+#endif
 
+#if WITH_OBJC
+static rb_encoding *default_external;
+    
+rb_encoding *
+rb_default_external_encoding(void)
+{
+    return default_external;
+}
+
+VALUE
+rb_enc_default_external(void)
+{
+    return enc_make(default_external);
+}
+#endif
+
 /*
  * call-seq:
  *   Encoding.default_external => enc
@@ -1063,7 +1217,11 @@
 void
 rb_enc_set_default_external(VALUE encoding)
 {
+#if WITH_OBJC
+    default_external = rb_enc_to_enc_ptr(encoding);
+#else
     default_external_index = rb_enc_to_index(rb_to_encoding(encoding));
+#endif
 }
 
 /*
@@ -1088,7 +1246,10 @@
 VALUE
 rb_locale_charmap(VALUE klass)
 {
-#if defined NO_LOCALE_CHARMAP
+#if WITH_OBJC
+    CFStringEncoding enc = CFStringGetSystemEncoding();
+    return (VALUE)CFStringConvertEncodingToIANACharSetName(enc);
+#elif defined NO_LOCALE_CHARMAP
     return rb_usascii_str_new2("ASCII-8BIT");
 #elif defined HAVE_LANGINFO_H
     char *codeset;
@@ -1101,6 +1262,7 @@
 #endif
 }
 
+#if !WITH_OBJC
 static void
 set_encoding_const(const char *name, rb_encoding *enc)
 {
@@ -1156,6 +1318,7 @@
     rb_ary_push(ary, str);
     return ST_CONTINUE;
 }
+#endif
 
 /*
  * call-seq:
@@ -1176,11 +1339,22 @@
 static VALUE
 rb_enc_name_list(VALUE klass)
 {
+#if WITH_OBJC
+    VALUE ary, list;
+    long i, count;
+
+    ary = rb_ary_new();
+    list = enc_list(klass);
+    for (i = 0, count = RARRAY_LEN(list); i < count; i++)
+	rb_ary_push(ary, enc_name(RARRAY_AT(list, i)));
+#else
     VALUE ary = rb_ary_new2(enc_table.names->num_entries);
     st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
+#endif
     return ary;
 }
 
+#if !WITH_OBJC
 static int
 rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
 {
@@ -1204,6 +1378,7 @@
     rb_hash_aset(aliases, key, str);
     return ST_CONTINUE;
 }
+#endif
 
 /*
  * call-seq:
@@ -1220,13 +1395,78 @@
 static VALUE
 rb_enc_aliases(VALUE klass)
 {
+#if WITH_OBJC
+    /* TODO: the CFString IANA <-> charset code does support aliases, we should
+     * find a way to return them here. 
+     */
+    return rb_hash_new();
+#else
     VALUE aliases[2];
     aliases[0] = rb_hash_new();
     aliases[1] = rb_ary_new();
     st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
     return aliases[0];
+#endif
 }
 
+const char *
+rb_enc_name(rb_encoding *enc)
+{
+    CFStringRef str;
+    if (enc != NULL 
+	&& (str = CFStringConvertEncodingToIANACharSetName(*enc)) != NULL)
+	return RSTRING_CPTR(str);
+    return NULL;
+}
+
+long 
+rb_enc_mbminlen(rb_encoding *enc)
+{
+    return rb_enc_mbmaxlen(enc);
+}
+
+long
+rb_enc_mbmaxlen(rb_encoding *enc)
+{
+    return CFStringGetMaximumSizeForEncoding(1, *enc);
+}
+
+rb_encoding *
+rb_enc_find(const char *name)
+{
+    return rb_enc_find2(rb_str_new2(name));
+}
+
+rb_encoding *
+rb_enc_find2(VALUE name)
+{
+    VALUE e = enc_find2(name);
+    return e == Qnil ? NULL : rb_enc_to_enc_ptr(e);
+}
+
+rb_encoding *
+rb_enc_get(VALUE obj)
+{
+    int type = TYPE(obj);
+    if (type == T_STRING) {
+	CFStringEncoding enc = CFStringGetFastestEncoding((CFStringRef)obj);
+	if (enc == kCFStringEncodingInvalidId)
+	    return NULL;
+	return rb_enc_to_enc_ptr(enc_make(&enc));
+    }
+    else {
+	/* TODO */
+	return NULL;
+    }
+}
+
+rb_encoding *
+rb_locale_encoding(void)
+{
+    CFStringEncoding enc = CFStringGetSystemEncoding();
+    return rb_enc_to_enc_ptr(enc_make(&enc));
+}
+
 void
 Init_Encoding(void)
 {

Modified: MacRuby/trunk/gc.c
===================================================================
--- MacRuby/trunk/gc.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/gc.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -2039,7 +2039,8 @@
 	if (ctx->class_of != 0) {
 	    if (ctx->class_of == rb_cClass) {
 		/* Class is a special case. */
-		if (TYPE(r->address) != T_CLASS 
+		if (rb_objc_is_non_native(r->address)
+		    || TYPE(r->address) != T_CLASS 
 		    || FL_TEST(r->address, FL_SINGLETON))
 		    continue;
 	    }

Modified: MacRuby/trunk/include/ruby/encoding.h
===================================================================
--- MacRuby/trunk/include/ruby/encoding.h	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/include/ruby/encoding.h	2008-05-15 08:06:22 UTC (rev 192)
@@ -17,6 +17,13 @@
 #else
 # include <varargs.h>
 #endif
+
+#if WITH_OBJC
+
+typedef CFStringEncoding rb_encoding;
+
+#else
+
 #include "ruby/oniguruma.h"
 
 #define ENCODING_INLINE_MAX 1023
@@ -48,8 +55,6 @@
 # define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
 #endif
 
-#define ENCODING_MAXNAMELEN 42
-
 #define ENC_CODERANGE_MASK	(FL_USER8|FL_USER9)
 #define ENC_CODERANGE_UNKNOWN	0
 #define ENC_CODERANGE_7BIT	FL_USER8
@@ -75,7 +80,10 @@
     } while (0)
 
 typedef OnigEncodingType rb_encoding;
+#endif
 
+#define ENCODING_MAXNAMELEN 42
+
 int rb_enc_replicate(const char *, rb_encoding *);
 int rb_define_dummy_encoding(const char *);
 int rb_enc_dummy_p(rb_encoding *);
@@ -108,12 +116,25 @@
 /* name -> rb_encoding */
 rb_encoding * rb_enc_find(const char *name);
 
+#if WITH_OBJC
+rb_encoding * rb_enc_find2(VALUE name);
+#endif
+
 /* encoding -> name */
+#if WITH_OBJC
+const char *rb_enc_name(rb_encoding *);
+#else
 #define rb_enc_name(enc) (enc)->name
+#endif
 
 /* encoding -> minlen/maxlen */
+#if WITH_OBJC
+long rb_enc_mbminlen(rb_encoding *);
+long rb_enc_mbmaxlen(rb_encoding *);
+#else
 #define rb_enc_mbminlen(enc) (enc)->min_enc_len
 #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
+#endif
 
 /* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
 int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
@@ -148,6 +169,17 @@
 /* ptr, ptr, encoding -> newline_or_not */
 #define rb_enc_is_newline(p,end,enc)  ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end))
 
+#if WITH_OBJC
+#define rb_enc_isctype(c,t,enc)	(iswctype(c,t))
+#define rb_enc_isascii(c,enc)	(iswascii(c))
+#define rb_enc_isalpha(c,enc)	(iswalpha(c))
+#define rb_enc_islower(c,enc)	(iswlower(c))
+#define rb_enc_isupper(c,enc)	(iswupper(c))
+#define rb_enc_isalnum(c,enc)	(iswalnum(c))
+#define rb_enc_isprint(c,enc)	(iswprint(c))
+#define rb_enc_isspace(c,enc)	(iswspace(c))
+#define rb_enc_isdigit(c,enc)	(iswdigit(c))
+#else
 #define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
 #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
 #define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
@@ -157,6 +189,7 @@
 #define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c)
 #define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
 #define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
+#endif
 
 #define rb_enc_asciicompat(enc) (!rb_enc_dummy_p(enc) && rb_enc_mbminlen(enc)==1)
 

Modified: MacRuby/trunk/io.c
===================================================================
--- MacRuby/trunk/io.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/io.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -1351,6 +1351,7 @@
 io_enc_str(VALUE str, rb_io_t *fptr)
 {
     OBJ_TAINT(str);
+#if !WITH_OBJC
     if (fptr->enc2) {
 	/* two encodings, so transcode from enc2 to enc */
 	/* the methods in transcode.c are static, so call indirectly */
@@ -1362,6 +1363,7 @@
 	/* just one encoding, so associate it with the string */
 	rb_enc_associate(str, io_read_encoding(fptr));
     }
+#endif
     return str;
 }
 
@@ -1372,7 +1374,9 @@
     long n;
     long pos = 0;
     rb_encoding *enc = io_input_encoding(fptr);
+#if !WITH_OBJC
     int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0;
+#endif
 
     if (siz == 0) siz = BUFSIZ;
     if (NIL_P(str)) {
@@ -1388,17 +1392,21 @@
             break;
 	}
 	bytes += n;
+#if !WITH_OBJC
 	if (cr != ENC_CODERANGE_BROKEN)
 	    pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + bytes, enc, &cr);
+#endif
 	if (bytes < siz) break;
 	siz += BUFSIZ;
 	rb_str_resize(str, siz);
     }
     if (bytes != siz) rb_str_resize(str, bytes);
     str = io_enc_str(str, fptr);
+#if !WITH_OBJC
     if (!fptr->enc2) {
 	ENC_CODERANGE_SET(str, cr);
     }
+#endif
     return str;
 }
 
@@ -1738,6 +1746,7 @@
 		RSTRING_PTR(str)[last++] = c;
 	    }
 	    if (limit > 0 && limit == pending) {
+#if !WITH_OBJC
 		char *p = fptr->rbuf+fptr->rbuf_off;
 		char *pp = p + limit;
 		char *pl = rb_enc_left_char_head(p, pp, enc);
@@ -1748,6 +1757,7 @@
 		    limit = pending;
 		    rb_str_set_len(str, RSTRING_LEN(str)-diff);
 		}
+#endif
 	    }
 	    read_buffered_data(RSTRING_PTR(str) + last, pending, fptr); /* must not fail */
 	    limit -= pending;
@@ -1816,7 +1826,9 @@
     int len = 0;
     long pos = 0;
     rb_encoding *enc = io_input_encoding(fptr);
+#if !WITH_OBJC
     int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0;
+#endif
 
     for (;;) {
 	long pending = READ_DATA_PENDING_COUNT(fptr);
@@ -1839,8 +1851,10 @@
 		read_buffered_data(RSTRING_PTR(str)+len, pending, fptr);
 	    }
 	    len += pending;
+#if !WITH_OBJC
 	    if (cr != ENC_CODERANGE_BROKEN)
 		pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + len, enc, &cr);
+#endif
 	    if (e) break;
 	}
 	rb_thread_wait_fd(fptr->fd);
@@ -1853,7 +1867,9 @@
 
     RSTRING_SYNC(str);
     str = io_enc_str(str, fptr);
+#if !WITH_OBJC
     if (!fptr->enc2) ENC_CODERANGE_SET(str, cr);
+#endif
     fptr->lineno++;
     ARGF.lineno = INT2FIX(fptr->lineno);
     return str;
@@ -1884,6 +1900,7 @@
 	}
     }
     if (!NIL_P(rs)) {
+#if !WITH_OBJC
 	rb_encoding *enc_rs, *enc_io;
 
 	GetOpenFile(io, fptr);
@@ -1911,6 +1928,7 @@
                 rs = rs2;
             }
 	}
+#endif
     }
     *rsp = rs;
     *limit = NIL_P(lim) ? -1L : NUM2LONG(lim);
@@ -1934,8 +1952,12 @@
     else if (limit == 0) {
 	return rb_enc_str_new(0, 0, io_read_encoding(fptr));
     }
-    else if (rs == rb_default_rs && limit < 0 &&
-             rb_enc_asciicompat(io_read_encoding(fptr))) {
+    else if (rs == rb_default_rs && limit < 0
+#if WITH_OBJC
+	    ) {
+#else	
+	     && rb_enc_asciicompat(io_read_encoding(fptr))) {
+#endif
 	return rb_io_getline_fast(fptr);
     }
     else {
@@ -1964,8 +1986,10 @@
 		if (RSTRING_LEN(str) < rslen) continue;
 		s = RSTRING_PTR(str);
 		p = s +  RSTRING_LEN(str) - rslen;
+#if !WITH_OBJC
 		pp = rb_enc_left_char_head(s, p, enc);
 		if (pp != p) continue;
+#endif
 		if (!rspara) rscheck(rsptr, rslen, rs);
 		if (memcmp(p, rsptr, rslen) == 0) break;
 	    }
@@ -2296,6 +2320,11 @@
     if (io_fillbuf(fptr) < 0) {
 	return Qnil;
     }
+#if WITH_OBJC
+    /* FIXME */
+    if (0) {
+    }
+#else
     r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
     if (MBCLEN_CHARFOUND_P(r) &&
         (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) {
@@ -2317,6 +2346,7 @@
             }
         }
     }
+#endif
     else {
 	str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1);
 	fptr->rbuf_off++;
@@ -2447,10 +2477,14 @@
     enc = io_read_encoding(fptr);
     if (FIXNUM_P(c)) {
 	int cc = FIX2INT(c);
+#if WITH_OBJC
+	c = rb_str_new((char *)&cc, 1);
+#else
 	char buf[16];
 
 	rb_enc_mbcput(cc, buf, enc);
 	c = rb_str_new(buf, rb_enc_codelen(cc, enc));
+#endif
     }
     else {
 	SafeStringValue(c);
@@ -3227,11 +3261,24 @@
 {
     const char *p0, *p1;
     char *enc2name;
+#if WITH_OBJC
+    rb_encoding *enc1, enc2;
+#else
     int idx, idx2;
-    
+#endif
+
     p0 = strrchr(estr, ':');
     if (!p0) p1 = estr;
     else     p1 = p0 + 1;
+#if WITH_OBJC
+    enc1 = rb_enc_find(p1);
+    if (enc1 != NULL) {
+	fptr->enc = enc1;
+    }
+    else {
+	rb_warn("Unsupported encoding %s ignored", p1);
+    }
+#else
     idx = rb_enc_find_index(p1);
     if (idx >= 0) {
 	fptr->enc = rb_enc_from_index(idx);
@@ -3239,28 +3286,49 @@
     else {
 	rb_warn("Unsupported encoding %s ignored", p1);
     }
+#endif
 
     if (p0) {
 	int n = p0 - estr;
 	if (n > ENCODING_MAXNAMELEN) {
+#if WITH_OBJC
+	    enc2 = NULL;
+#else
 	    idx2 = -1;
+#endif
 	}
 	else {
 	    enc2name = ALLOCA_N(char, n+1);
 	    memcpy(enc2name, estr, n);
 	    enc2name[n] = '\0';
 	    estr = enc2name;
+#if WITH_OBJC
+	    enc2 = rb_enc_find(enc2name);
+#else
 	    idx2 = rb_enc_find_index(enc2name);
+#endif
 	}
+#if WITH_OBJC
+	if (enc2 == NULL) {
+#else
 	if (idx2 < 0) {
+#endif
 	    rb_warn("Unsupported encoding %.*s ignored", n, estr);
 	}
+#if WITH_OBJC
+	else if (enc1 == enc2) {
+#else
 	else if (idx2 == idx) {
+#endif
 	    rb_warn("Ignoring internal encoding %.*s: it is identical to external encoding %s",
 		    n, estr, p1);
 	}
 	else {
+#if WITH_OBJC
+	    fptr->enc2 = enc2;
+#else
 	    fptr->enc2 = rb_enc_from_index(idx2);
+#endif
 	}
     }
 }

Modified: MacRuby/trunk/marshal.c
===================================================================
--- MacRuby/trunk/marshal.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/marshal.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -469,8 +469,18 @@
 static void
 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
 {
+    rb_encoding *enc = 0;
+#if WITH_OBJC
+    const char *name;
+
+    enc = rb_enc_get(obj);
+    if (enc == NULL) {
+	w_long(num, arg->arg);
+	return;
+    }
+    name = rb_enc_name(enc);
+#else
     int encidx = rb_enc_get_index(obj);
-    rb_encoding *enc = 0;
     st_data_t name;
 
     if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
@@ -487,6 +497,7 @@
 	name = (st_data_t)rb_str_new2(rb_enc_name(enc));
 	st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
     } while (0);
+#endif
     w_object(name, arg->arg, arg->limit);
 }
 
@@ -1110,7 +1121,11 @@
 	while (len--) {
 	    ID id = r_symbol(arg);
 	    VALUE val = r_object(arg);
+#if WITH_OBJC
+	    if (0) {
+#else
 	    if (id == rb_id_encoding()) {
+#endif
 		int idx = rb_enc_find_index(StringValueCStr(val));
 		if (idx > 0) rb_enc_associate_index(obj, idx);
 	    }

Modified: MacRuby/trunk/numeric.c
===================================================================
--- MacRuby/trunk/numeric.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/numeric.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -1863,12 +1863,17 @@
 	rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1)", argc);
 	break;
     }
+#if WITH_OBJC
+    /* TODO */
+    rb_notimplement();
+#else
     enc = rb_to_encoding(argv[0]);
     if (!enc) enc = rb_ascii8bit_encoding();
     if (i < 0 || (n = rb_enc_codelen(i, enc)) <= 0) goto out_of_range;
     str = rb_enc_str_new(0, n, enc);
     rb_enc_mbcput(i, RSTRING_PTR(str), enc);
     return str;
+#endif
 }
 
 /********************************************************************

Modified: MacRuby/trunk/objc.m
===================================================================
--- MacRuby/trunk/objc.m	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/objc.m	2008-05-15 08:06:22 UTC (rev 192)
@@ -460,6 +460,7 @@
 	{
 	    char v = RTEST(rval);
 	    *(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberCharType, &v);
+	    CFMakeCollectable(*(id *)ocval);
 	    return true;
 	}
 
@@ -467,6 +468,7 @@
 	{
 	    double v = RFLOAT_VALUE(rval);
 	    *(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberDoubleType, &v);
+	    CFMakeCollectable(*(id *)ocval);
 	    return true;
 	}	
 
@@ -487,6 +489,7 @@
 		*(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberLongType, &v);
 #endif
 	    }
+	    CFMakeCollectable(*(id *)ocval);
 	    return true;
 	}
 
@@ -495,6 +498,7 @@
 	    ID name = SYM2ID(rval);
 	    *(id *)ocval = (id)CFStringCreateWithCString(NULL, rb_id2name(name),
 		kCFStringEncodingASCII); /* XXX this is temporary */
+	    CFMakeCollectable(*(id *)ocval);
 	    return true;
 	}
     }
@@ -2120,7 +2124,7 @@
     if (bs_find_path(framework_path, path, sizeof path)) {
 	if (!bs_parse(path, 0, bs_parse_cb, NULL, &error))
 	    rb_raise(rb_eRuntimeError, error);
-#if 1
+#if 0
 	/* FIXME 'GC capability mismatch' with .dylib files */
 	p = strrchr(path, '.');
 	assert(p != NULL);

Modified: MacRuby/trunk/parse.y
===================================================================
--- MacRuby/trunk/parse.y	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/parse.y	2008-05-15 08:06:22 UTC (rev 192)
@@ -266,14 +266,23 @@
 #endif
 };
 
-#define UTF8_ENC() (parser->utf8 ? parser->utf8 : \
+#if WITH_OBJC
+# define UTF8_ENC() (NULL)
+#else
+# define UTF8_ENC() (parser->utf8 ? parser->utf8 : \
 		    (parser->utf8 = rb_utf8_encoding()))
+#endif
 #define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
 #define STR_NEW0() rb_usascii_str_new(0,0)
 #define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
 #define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
-#define STR_ENC(m) ((m)?parser->enc:rb_usascii_encoding())
-#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
+#if WITH_OBJC
+# define STR_ENC(m) (parser->enc)
+# define ENC_SINGLE(cr) (1)
+#else
+# define STR_ENC(m) ((m)?parser->enc:rb_usascii_encoding())
+# define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
+#endif
 #define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb))
 
 #ifdef YYMALLOC
@@ -4641,8 +4650,10 @@
 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
 #endif
 
+#if !WITH_OBJC
 #define parser_mbclen()  mbclen((lex_p-1),lex_pend,parser->enc)
 #define parser_precise_mbclen()  rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc)
+#endif
 #define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p))
 #define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc))
 
@@ -4678,11 +4689,19 @@
 
 	if (len > max_line_margin * 2 + 10) {
 	    if (lex_p - p > max_line_margin) {
+#if WITH_OBJC
+		p = lex_p - max_line_margin;
+#else
 		p = rb_enc_prev_char(p, lex_p - max_line_margin, rb_enc_get(lex_lastline));
+#endif
 		pre = "...";
 	    }
 	    if (pe - lex_p > max_line_margin) {
+#if WITH_OBJC
+		pe = lex_p + max_line_margin;
+#else
 		pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, rb_enc_get(lex_lastline));
+#endif
 		post = "...";
 	    }
 	    len = pe - p;
@@ -4956,6 +4975,7 @@
 #endif
 
     str = rb_enc_str_new(p, n, enc);
+#if !WITH_OBJC
     if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) {
 	if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
 	    rb_enc_associate(str, rb_usascii_encoding());
@@ -4964,6 +4984,7 @@
 	    rb_enc_associate(str, rb_ascii8bit_encoding());
 	}
     }
+#endif
 
     return str;
 }
@@ -5281,8 +5302,14 @@
 static void
 parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc)
 {
+#if WITH_OBJC
+    /* FIXME */
+    char *buf = tokspace(1);
+    *(buf) = c;    
+#else
     int len = rb_enc_codelen(c, enc);
     rb_enc_mbcput(c, tokspace(len), enc);
+#endif
 }
 
 static int
@@ -5409,11 +5436,15 @@
 static int
 parser_tokadd_mbchar(struct parser_params *parser, int c)
 {
+#if WITH_OBJC
+    int len = 1;
+#else
     int len = parser_precise_mbclen();
     if (!MBCLEN_CHARFOUND_P(len)) {
 	compile_error(PARSER_ARG "invalid multibyte char");
 	return -1;
     }
+#endif
     tokadd(c);
     lex_p += --len;
     if (len > 0) tokcopy(len);
@@ -5856,8 +5887,15 @@
 static void
 parser_set_encode(struct parser_params *parser, const char *name)
 {
+    rb_encoding *enc;
+#if WITH_OBJC
+    enc = rb_enc_find(name);
+    if (enc == NULL) {
+	rb_raise(rb_eArgError, "unknown encoding name: %s", name);
+    }
+    /* TODO should raise if the encoding is not ASCII compatible */
+#else
     int idx = rb_enc_find_index(name);
-    rb_encoding *enc;
 
     if (idx < 0) {
 	rb_raise(rb_eArgError, "unknown encoding name: %s", name);
@@ -5866,6 +5904,7 @@
     if (!rb_enc_asciicompat(enc)) {
 	rb_raise(rb_eArgError, "%s is not ASCII compatible", rb_enc_name(enc));
     }
+#endif
     parser->enc = enc;
 }
 
@@ -6085,8 +6124,10 @@
     }
     pushback(c);
     parser->enc = rb_enc_get(lex_lastline);
+#if !WITH_OBJC
     if (parser->enc == NULL)
 	parser->enc = rb_utf8_encoding();
+#endif
 }
 
 #define IS_ARG() (lex_state == EXPR_ARG || lex_state == EXPR_CMDARG)
@@ -7248,9 +7289,13 @@
 	break;
     }
 
+#if !WITH_OBJC
     mb = ENC_CODERANGE_7BIT;
+#endif
     do {
+#if !WITH_OBJC
 	if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
+#endif
 	if (tokadd_mbchar(c) == -1) return 0;
 	c = nextc();
     } while (parser_is_identchar());
@@ -7303,7 +7348,11 @@
 		}
 	    }
 
+#if WITH_OBJC
+	    if (lex_state != EXPR_DOT) {
+#else
 	    if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
+#endif
 		const struct kwtable *kw;
 
 		/* See if it is a reserved word.  */
@@ -7557,11 +7606,13 @@
 static void
 literal_concat0(struct parser_params *parser, VALUE head, VALUE tail)
 {
+#if !WITH_OBJC
     if (!rb_enc_compatible(head, tail)) {
 	compile_error(PARSER_ARG "string literal encodings differ (%s / %s)",
 		      rb_enc_name(rb_enc_get(head)),
 		      rb_enc_name(rb_enc_get(tail)));
     }
+#endif
     RSTRING_SYNC(head);
     rb_str_buf_append(head, tail);
 }
@@ -8629,6 +8680,9 @@
 static void
 reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options)
 {
+#if WITH_OBJC
+    /* TODO */
+#else
     int c = RE_OPTION_ENCODING_IDX(options);
 
     if (c) {
@@ -8663,6 +8717,7 @@
     compile_error(PARSER_ARG
         "regexp encoding option '%c' differs from source encoding '%s'",
         c, rb_enc_name(rb_enc_get(str)));
+#endif
 }
 
 static void
@@ -8999,7 +9054,11 @@
 	++m;
 	if (m < e && is_identchar(m, e, enc)) {
 	    if (!ISASCII(*m)) mb = 1;
+#if WITH_OBJC
+	    m += e-m;
+#else
 	    m += rb_enc_mbclen(m, e, enc);
+#endif
 	}
 	break;
       default:
@@ -9015,7 +9074,11 @@
 int
 rb_symname_p(const char *name)
 {
+#if WITH_OBJC
+    return rb_enc_symname_p(name, NULL);
+#else
     return rb_enc_symname_p(name, rb_ascii8bit_encoding());
+#endif
 }
 
 int
@@ -9096,7 +9159,11 @@
       id:
 	if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m)))
 	    return Qfalse;
+#if WITH_OBJC
+	while (m < e && is_identchar(m, e, enc)) m += e-m;
+#else
 	while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
+#endif
 	if (localid) {
 	    switch (*m) {
 	      case '!': case '?': case '=': ++m;
@@ -9152,7 +9219,13 @@
       case '$':
 	id |= ID_GLOBAL;
 	if ((mb = is_special_global_name(++m, e, enc)) != 0) {
-	    if (!--mb) enc = rb_ascii8bit_encoding();
+	    if (!--mb) {
+#if WITH_OBJC
+		enc = NULL;
+#else
+		enc = rb_ascii8bit_encoding();
+#endif
+	    }
 	    goto new_id;
 	}
 	break;
@@ -9199,6 +9272,7 @@
 	}
 	break;
     }
+#if !WITH_OBJC
     mb = 0;
     if (!rb_enc_isdigit(*m, enc)) {
 	while (m <= name + last && is_identchar(m, e, enc)) {
@@ -9225,12 +9299,12 @@
 	}
       mbstr:;
     }
+#endif
   new_id:
     id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
   id_register:
     str = rb_enc_str_new(name, len, enc);
-// TODO
-//    OBJ_FREEZE(str);
+    OBJ_FREEZE(str);
 #if WITH_OBJC
     CFDictionarySetValue(global_symbols.sym_id, (const void *)name_hash, 
 	(const void *)id);
@@ -9246,7 +9320,11 @@
 ID
 rb_intern2(const char *name, long len)
 {
+#if WITH_OBJC
+    return rb_intern3(name, len, NULL);
+#else
     return rb_intern3(name, len, rb_usascii_encoding());
+#endif
 }
 
 #undef rb_intern
@@ -9262,12 +9340,16 @@
     rb_encoding *enc;
     ID id;
 
+#if WITH_OBJC
+    enc = rb_enc_get(str);
+#else
     if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
 	enc = rb_usascii_encoding();
     }
     else {
 	enc = rb_enc_get(str);
     }
+#endif
     id = rb_intern3(RSTRING_CPTR(str), RSTRING_CLEN(str), enc);
     RB_GC_GUARD(str);
     return id;
@@ -9467,7 +9549,11 @@
 #ifdef YYMALLOC
     parser->heap = NULL;
 #endif
+#if WITH_OBJC
+    parser->enc = NULL;
+#else
     parser->enc = rb_usascii_encoding();
+#endif
 }
 
 extern void rb_mark_source_filename(char *);

Modified: MacRuby/trunk/re.c
===================================================================
--- MacRuby/trunk/re.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/re.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -184,6 +184,7 @@
       case 'n':
         *kcode = -1;
         return (*option = ARG_ENCODING_NONE);
+#if !WITH_OBJC
       case 'e':
 	*kcode = rb_enc_find_index("EUC-JP");
 	break;
@@ -193,6 +194,7 @@
       case 'u':
 	*kcode = rb_enc_find_index("UTF-8");
 	break;
+#endif
       default:
 	*kcode = -1;
 	return (*option = char_to_option(c));
@@ -219,10 +221,16 @@
 
     p = s; pend = p + len;
     while (p<pend) {
+#if WITH_OBJC
+	c = *p;
+	clen = 1;
+	if (0) {}
+#else
         c = rb_enc_ascget(p, pend, &clen, enc);
         if (c == -1) {
             p += mbclen(p, pend, enc);
         }
+#endif
         else if (c != '/' && rb_enc_isprint(c, enc)) {
             p += clen;
         }
@@ -237,9 +245,18 @@
     else {
 	p = s;
 	while (p<pend) {
+#if WITH_OBJC
+	    c = *p;
+	    clen = 1;
+#else
             c = rb_enc_ascget(p, pend, &clen, enc);
+#endif
 	    if (c == '\\' && p+clen < pend) {
+#if WITH_OBJC
+		int n = clen + (pend - (p+clen));
+#else
 		int n = clen + mbclen(p+clen, pend, enc);
+#endif
 		rb_str_buf_cat(str, p, n);
 		p += n;
 		continue;
@@ -249,12 +266,14 @@
 		rb_str_buf_cat(str, &c, 1);
 		rb_str_buf_cat(str, p, clen);
 	    }
+#if !WITH_OBJC
 	    else if (c == -1) {
                 int l = mbclen(p, pend, enc);
 	    	rb_str_buf_cat(str, p, l);
 		p += l;
 		continue;
 	    }
+#endif
 	    else if (rb_enc_isprint(c, enc)) {
 		rb_str_buf_cat(str, p, clen);
 	    }
@@ -277,7 +296,9 @@
 {
     VALUE str = rb_str_buf_new2("/");
 
+#if !WITH_OBJC
     rb_enc_copy(str, re);
+#endif
     rb_reg_expr_str(str, s, len);
     rb_str_buf_cat2(str, "/");
     if (re) {
@@ -374,7 +395,9 @@
 
     rb_reg_check(re);
 
+#if !WITH_OBJC
     rb_enc_copy(str, re);
+#endif
     options = RREGEXP(re)->ptr->options;
     ptr = (UChar*)RREGEXP(re)->str;
     len = RREGEXP(re)->len;
@@ -416,9 +439,17 @@
 	if (*ptr == ':' && ptr[len-1] == ')') {
 	    int r;
 	    Regexp *rp;
+	    OnigEncoding oenc;
+	    
+#if WITH_OBJC
+	    oenc = ONIG_ENCODING_ASCII;
+#else
+	    oenc = rb_enc_get(re);
+#endif
+
             r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT,
                                 ONIGENC_CASE_FOLD_DEFAULT,
-                                rb_enc_get(re),
+                                oenc,
                                 OnigDefaultSyntax);
 	    if (r == 0) {
 		 ++ptr;
@@ -445,7 +476,9 @@
     rb_str_buf_cat2(str, ":");
     rb_reg_expr_str(str, (char*)ptr, len);
     rb_str_buf_cat2(str, ")");
+#if !WITH_OBJC
     rb_enc_copy(str, re);
+#endif
 
     OBJ_INFECT(str, re);
     return str;
@@ -465,7 +498,9 @@
     char opts[6];
     VALUE desc = rb_str_buf_new2(err);
 
+#if !WITH_OBJC
     rb_enc_associate(desc, enc);
+#endif
     rb_str_buf_cat2(desc, ": /");
     rb_reg_expr_str(desc, s, len);
     opts[0] = '/';
@@ -629,6 +664,7 @@
     Regexp *rp;
     int r;
     OnigErrorInfo einfo;
+    OnigEncoding oenc;
 
     /* Handle escaped characters first. */
 
@@ -637,8 +673,14 @@
        from that.
     */
 
+#if WITH_OBJC
+    oenc = ONIG_ENCODING_ASCII;
+#else
+    oenc = enc;
+#endif
+
     r = onig_alloc_init(&rp, flags, ONIGENC_CASE_FOLD_DEFAULT,
-                        enc, OnigDefaultSyntax);
+                        oenc, OnigDefaultSyntax);
     if (r) {
 	onig_error_code_to_str((UChar*)err, r);
 	return 0;
@@ -742,7 +784,12 @@
     c = 0;
     for (i = 0; i < num_pos; i++) {
         q = s + pairs[i].byte_pos;
+#if WITH_OBJC
+	//long n = strlen(p);
+	c += q-p;//(n > (q-p) ? q-p : n);
+#else
         c += rb_enc_strlen(p, q, enc);
+#endif
         pairs[i].char_pos = c;
         p = q;
     }
@@ -1052,20 +1099,25 @@
     int need_recompile = 0;
     rb_encoding *enc;
 
+#if WITH_OBJC
+    need_recompile = 0;
+#else
     if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
         rb_raise(rb_eArgError,
             "broken %s string",
             rb_enc_name(rb_enc_get(str)));
     }
+#endif
 
     rb_reg_check(re);
     /* ignorecase status */
+#if !WITH_OBJC
     if (rb_reg_fixed_encoding_p(re) || !rb_enc_str_asciicompat_p(str)) {
         if (ENCODING_GET(re) != rb_enc_get_index(str) &&
             rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
             rb_raise(rb_eArgError,
                 "incompatible encoding regexp match (%s regexp with %s string)",
-                rb_enc_name(rb_enc_from_index(ENCODING_GET(re))),
+		rb_enc_name(rb_enc_get(re)),
                 rb_enc_name(rb_enc_get(str)));
         }
     }
@@ -1082,6 +1134,7 @@
                     rb_enc_name(enc));
         }
     }
+#endif
 
     if (need_recompile) {
 	onig_errmsg_buffer err = "";
@@ -1104,10 +1157,14 @@
             rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
         }
 
+#if WITH_OBJC
+	enc = (rb_encoding *)ONIG_ENCODING_ASCII;
+#endif
+
 	r = onig_new(®2, (UChar* )RSTRING_CPTR(unescaped),
 		     (UChar* )(RSTRING_CPTR(unescaped) 
 			 + RSTRING_CLEN(unescaped)),
-		     reg->options, enc,
+		     reg->options, (OnigEncoding)enc,
 		     OnigDefaultSyntax, &einfo);
 	if (r) {
 	    onig_error_code_to_str((UChar*)err, r, &einfo);
@@ -1815,8 +1872,12 @@
     }
 
     chbuf[chlen++] = byte;
-    while (chlen < chmaxlen &&
-           MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
+    while (chlen < chmaxlen 
+#if WITH_OBJC
+	&& 1) {
+#else
+	&& MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
+#endif
         byte = read_escaped_byte(&p, end, err);
         if (byte == -1) {
             return -1;
@@ -1824,11 +1885,13 @@
         chbuf[chlen++] = byte;
     }
 
+#if !WITH_OBJC
     l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
     if (MBCLEN_INVALID_P(l)) {
         strcpy(err, "invalid multibyte escape");
         return -1;
     }
+#endif
     if (1 < chlen || (chbuf[0] & 0x80)) {
         rb_str_buf_cat(buf, chbuf, chlen);
 
@@ -1876,12 +1939,14 @@
         len = rb_uv_to_utf8(utf8buf, uv);
         rb_str_buf_cat(buf, utf8buf, len);
 
+#if !WITH_OBJC
         if (*encp == 0)
             *encp = rb_utf8_encoding();
         else if (*encp != rb_utf8_encoding()) {
             strcpy(err, "UTF-8 character in non UTF-8 regexp");
             return -1;
         }
+#endif
     }
     return 0;
 }
@@ -1954,12 +2019,16 @@
     char smallbuf[2];
 
     while (p < end) {
+#if WITH_OBJC
+	int chlen = 1;
+#else
         int chlen = rb_enc_precise_mbclen(p, end, enc);
         if (!MBCLEN_CHARFOUND_P(chlen)) {
             strcpy(err, "invalid multibyte character");
             return -1;
         }
         chlen = MBCLEN_CHARFOUND_LEN(chlen);
+#endif
         if (1 < chlen || (*p & 0x80)) {
             rb_str_buf_cat(buf, p, chlen);
             p += chlen;
@@ -2057,14 +2126,18 @@
         *fixed_enc = 0;
     else {
         *fixed_enc = enc;
+#if !WITH_OBJC
         rb_enc_associate(buf, enc);
+#endif
     }
 
     if (unescape_nonascii(p, end, enc, buf, fixed_enc, err) != 0)
         return Qnil;
 
     if (*fixed_enc) {
+#if !WITH_OBJC
         rb_enc_associate(buf, *fixed_enc);
+#endif
     }
 
     return buf;
@@ -2144,7 +2217,9 @@
             rb_str_buf_append(result, str);
     }
     if (regexp_enc) {
+#if !WITH_OBJC
         rb_enc_associate(result, regexp_enc);
+#endif
     }
 
     return result;
@@ -2157,7 +2232,11 @@
     struct RRegexp *re = RREGEXP(obj);
     VALUE unescaped;
     rb_encoding *fixed_enc = 0;
+#if WITH_OBJC
+    rb_encoding *a_enc = NULL;
+#else
     rb_encoding *a_enc = rb_ascii8bit_encoding();
+#endif
 
     if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)
 	rb_raise(rb_eSecurityError, "Insecure: can't modify regexp");
@@ -2184,11 +2263,15 @@
 	    enc = fixed_enc;
 	}
     }
+#if !WITH_OBJC
     else if (!(options & ARG_ENCODING_FIXED)) {
        enc = rb_usascii_encoding();
     }
+#endif
 
+#if !WITH_OBJC
     rb_enc_associate((VALUE)re, enc);
+#endif
     if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
 	re->basic.flags |= KCODE_FIXED;
     }
@@ -2214,6 +2297,8 @@
     int ret;
     rb_encoding *enc = rb_enc_get(str);
     if (options & ARG_ENCODING_NONE) {
+#if !WITH_OBJC
+	/* TODO */
         rb_encoding *ascii8bit = rb_ascii8bit_encoding();
         if (enc != ascii8bit) {
             if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
@@ -2222,6 +2307,7 @@
             }
             enc = ascii8bit;
         }
+#endif
     }
     ret = rb_reg_initialize(obj, RSTRING_CPTR(str), RSTRING_CLEN(str), enc,
 			    options, err);
@@ -2277,7 +2363,11 @@
 VALUE
 rb_reg_new(const char *s, long len, int options)
 {
+#if WITH_OBJC
+    return rb_enc_reg_new(s, len, NULL, options);
+#else
     return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
+#endif
 }
 
 VALUE
@@ -2302,7 +2392,11 @@
 {
     volatile VALUE save_str = str;
     if (reg_cache && RREGEXP(reg_cache)->len == RSTRING_CLEN(str)
+#if WITH_OBJC
+	&& rb_enc_get(reg_cache) == rb_enc_get(str)
+#else
 	&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
+#endif
         && memcmp(RREGEXP(reg_cache)->str, RSTRING_CPTR(str), RSTRING_CLEN(str)) == 0)
 	return reg_cache;
 
@@ -2359,7 +2453,11 @@
     if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
     if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
     if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
+#if WITH_OBJC
+    if (rb_enc_get(re1) != rb_enc_get(re2)) return Qfalse;
+#else
     if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
+#endif
     if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0) {
 	return Qtrue;
     }
@@ -2643,7 +2741,9 @@
 	if (argc == 3 && !NIL_P(argv[2])) {
 	    char *kcode = StringValuePtr(argv[2]);
 	    if (kcode[0] == 'n' || kcode[1] == 'N') {
+#if !WITH_OBJC
 		enc = rb_ascii8bit_encoding();
+#endif
 		flags |= ARG_ENCODING_FIXED;
 	    }
 	    else {
@@ -2669,18 +2769,27 @@
     char *t;
     VALUE tmp;
     int c, clen;
+#if WITH_OBJC
+    int ascii_only = 0;
+#else
     int ascii_only = rb_enc_str_asciionly_p(str);
+#endif
 
     s = RSTRING_CPTR(str);
     if (s == NULL)
 	return str;
     send = s + RSTRING_CLEN(str);
     while (s < send) {
-        c = rb_enc_ascget(s, send, &clen, enc);
+#if WITH_OBJC
+	c = *s;
+	clen = 1;
+#else
+	c = rb_enc_ascget(s, send, &clen, enc);
 	if (c == -1) {
             s += mbclen(s, send, enc);
 	    continue;
 	}
+#endif
 	switch (c) {
 	  case '[': case ']': case '{': case '}':
 	  case '(': case ')': case '|': case '-':
@@ -2694,24 +2803,32 @@
     }
     if (ascii_only) {
         str = rb_str_new3(str);
+#if !WITH_OBJC
         rb_enc_associate(str, rb_usascii_encoding());
+#endif
     }
     return str;
 
   meta_found:
     tmp = rb_str_new(0, RSTRING_CLEN(str)*2);
+#if !WITH_OBJC
     if (ascii_only) {
         rb_enc_associate(tmp, rb_usascii_encoding());
     }
     else {
         rb_enc_copy(tmp, str);
     }
+#endif
     t = RSTRING_PTR(tmp);
     /* copy upto metacharacter */
     memcpy(t, RSTRING_CPTR(str), s - RSTRING_CPTR(str));
     t += s - RSTRING_CPTR(str);
 
     while (s < send) {
+#if WITH_OBJC
+	c = *s;
+	clen = 1;
+#else
         c = rb_enc_ascget(s, send, &clen, enc);
 	if (c == -1) {
 	    int n = mbclen(s, send, enc);
@@ -2720,6 +2837,7 @@
 		*t++ = *s++;
 	    continue;
 	}
+#endif
         s += clen;
 	switch (c) {
 	  case '[': case ']': case '{': case '}':
@@ -2758,7 +2876,9 @@
     }
     rb_str_resize(tmp, t - RSTRING_PTR(tmp));
     RSTRING_SYNC(tmp);
+#if !WITH_OBJC
     OBJ_INFECT(tmp, str);
+#endif
     return tmp;
 }
 
@@ -2894,9 +3014,11 @@
                         rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
                             rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
                 }
+#if !WITH_OBJC
                 else if (rb_enc_str_asciionly_p(e)) {
                     has_asciionly = 1;
                 }
+#endif
                 else {
                     if (!has_ascii_compat_fixed)
                         has_ascii_compat_fixed = enc;
@@ -2917,12 +3039,15 @@
                 }
             }
 
+#if !WITH_OBJC
             if (i == 0) {
                 rb_enc_copy(source, v);
             }
+#endif
 	    rb_str_append(source, v);
 	}
 
+#if !WITH_OBJC
         if (has_ascii_incompat) {
             result_enc = has_ascii_incompat;
         }
@@ -2934,6 +3059,7 @@
         }
 
         rb_enc_associate(source, result_enc);
+#endif
         return rb_class_new_instance(1, &source, rb_cRegexp);
     }
 }
@@ -2998,18 +3124,24 @@
     rb_encoding *str_enc = rb_enc_get(str);
     rb_encoding *src_enc = rb_enc_get(src);
 
+#if !WITH_OBJC
     rb_enc_check(str, src);
+#endif
     p = s = RSTRING_CPTR(str);
     e = s + RSTRING_CLEN(str);
 
     while (s < e) {
+	const char *ss;
+#if WITH_OBJC
+	int c = *s;
+	clen = 1;
+#else
         int c = rb_enc_ascget(s, e, &clen, str_enc);
-	const char *ss;
-
 	if (c == -1) {
 	    s += mbclen(s, e, str_enc);
 	    continue;
 	}
+#endif
 	ss = s;
         s += clen;
 
@@ -3020,6 +3152,10 @@
 	}
         rb_enc_str_buf_cat(val, p, ss-p, str_enc);
 
+#if WITH_OBJC
+	c = *s;
+	clen = 1;
+#else
         c = rb_enc_ascget(s, e, &clen, str_enc);
         if (c == -1) {
             s += mbclen(s, e, str_enc);
@@ -3027,6 +3163,7 @@
             p = s;
 	    continue;
         }
+#endif
         s += clen;
 
 	p = s;
@@ -3042,14 +3179,26 @@
 	    break;
 
           case 'k':
+#if WITH_OBJC
+	    clen = 1;
+	    if (s < e && *s == '<') {
+#else
             if (s < e && rb_enc_ascget(s, e, &clen, str_enc) == '<') {
+#endif
                 const char *name, *name_end;
                
                 name_end = name = s + clen;
                 while (name_end < e) {
+#if WITH_OBJC
+		    c = *name_end;
+		    clen = 1;
+                    if (c == '>') break;
+                    name_end += clen;
+#else
                     c = rb_enc_ascget(name_end, e, &clen, str_enc);
                     if (c == '>') break;
                     name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
+#endif
                 }
                 if (name_end < e) {
                     no = name_to_backref_number(regs, regexp, name, name_end);

Modified: MacRuby/trunk/ruby.c
===================================================================
--- MacRuby/trunk/ruby.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/ruby.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -91,7 +91,11 @@
     struct {
 	struct {
 	    VALUE name;
+#if WITH_OBJC
+	    rb_encoding *enc;
+#else
 	    int index;
+#endif
 	} enc;
     } src, ext;
 };
@@ -926,6 +930,17 @@
     Init_prelude();
 }
 
+#if WITH_OBJC
+static rb_encoding *
+opt_enc_find(VALUE enc_name)
+{
+    rb_encoding *enc = rb_enc_find2(enc_name);
+    if (enc == NULL)
+	rb_raise(rb_eRuntimeError, "unknown encoding name - %s", 
+	    RSTRING_CPTR(enc_name));
+    return enc;
+}
+#else
 static int
 opt_enc_index(VALUE enc_name)
 {
@@ -940,8 +955,13 @@
     }
     return i;
 }
+#endif
 
+#if WITH_OBJC
+static rb_encoding *src_encoding;
+#else
 static int src_encoding_index = -1; /* TODO: VM private */
+#endif
 
 static VALUE
 process_options(VALUE arg)
@@ -1059,15 +1079,30 @@
     parser = rb_parser_new();
     if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue);
     if (opt->ext.enc.name != 0) {
+#if WITH_OBJC
+	opt->ext.enc.enc = opt_enc_find(opt->ext.enc.name);
+#else
 	opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
+#endif
     }
     if (opt->src.enc.name != 0) {
+#if WITH_OBJC
+	opt->src.enc.enc = opt_enc_find(opt->src.enc.name);
+	src_encoding = opt->src.enc.enc;
+#else
 	opt->src.enc.index = opt_enc_index(opt->src.enc.name);
 	src_encoding_index = opt->src.enc.index;
+#endif
     }
+#if WITH_OBJC
+    if (opt->ext.enc.enc != NULL) {
+	enc = opt->ext.enc.enc;
+    }
+#else
     if (opt->ext.enc.index >= 0) {
 	enc = rb_enc_from_index(opt->ext.enc.index);
     }
+#endif
     else {
 	enc = rb_locale_encoding();
     }
@@ -1075,13 +1110,21 @@
 
     if (opt->e_script) {
 	rb_encoding *eenc;
+#if WITH_OBJC
+	if (opt->src.enc.enc != NULL) {
+	    eenc = opt->src.enc.enc;
+	}
+#else
 	if (opt->src.enc.index >= 0) {
 	    eenc = rb_enc_from_index(opt->src.enc.index);
 	}
+#endif
 	else {
 	    eenc = rb_locale_encoding();
 	}
+#if !WITH_OBJC
 	rb_enc_associate(opt->e_script, eenc);
+#endif
 	require_libraries();
 	tree = rb_parser_compile_string(parser, opt->script, opt->e_script, 1);
     }
@@ -1233,11 +1276,20 @@
 	    }
 	    rb_io_ungetc(f, INT2FIX('#'));
 	    if (no_src_enc && opt->src.enc.name) {
+#if WITH_OBJC
+		opt->src.enc.enc = opt_enc_find(opt->src.enc.name);
+		src_encoding = opt->src.enc.enc;
+#else
 		opt->src.enc.index = opt_enc_index(opt->src.enc.name);
 		src_encoding_index = opt->src.enc.index;
+#endif
 	    }
 	    if (no_ext_enc && opt->ext.enc.name) {
+#if WITH_OBJC
+		opt->ext.enc.enc = opt_enc_find(opt->ext.enc.name);
+#else
 		opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
+#endif
 	    }
 	}
 	else if (!NIL_P(c)) {
@@ -1245,6 +1297,14 @@
 	}
 	require_libraries();	/* Why here? unnatural */
     }
+#if WITH_OBJC
+    if (opt->src.enc.enc != NULL) {
+    	enc = opt->src.enc.enc;
+    }
+    else {
+	enc = rb_locale_encoding();
+    }
+#else
     if (opt->src.enc.index >= 0) {
 	enc = rb_enc_from_index(opt->src.enc.index);
     }
@@ -1254,6 +1314,7 @@
     else {
 	enc = rb_usascii_encoding();
     }
+#endif
     rb_funcall(f, rb_intern("set_encoding"), 1, rb_enc_from_encoding(enc));
     tree = (NODE *)rb_parser_compile_file(parser, fname, f, line_start);
     rb_funcall(f, rb_intern("set_encoding"), 1, rb_parser_encoding(parser));
@@ -1272,7 +1333,11 @@
     struct cmdline_options opt;
 
     MEMZERO(&opt, opt, 1);
+#if WITH_OBJC
+    opt.src.enc.enc = src_encoding;
+#else
     opt.src.enc.index = src_encoding_index;
+#endif
     return load_file(rb_parser_new(), fname, 0, &opt);
 }
 
@@ -1504,8 +1569,13 @@
     args.argc = argc;
     args.argv = argv;
     args.opt = &opt;
+#if WITH_OBJC
+    opt.src.enc.enc = src_encoding;
+    opt.ext.enc.enc = NULL;
+#else
     opt.src.enc.index = src_encoding_index;
     opt.ext.enc.index = -1;
+#endif
     tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(),
 				    process_options, (VALUE)&args,
 				    0, rb_progname);

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/string.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -215,15 +215,18 @@
 }
 #endif
 
-#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
-#define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
-
 #if WITH_OBJC
-# define STR_ENC_GET(str) (rb_ascii8bit_encoding()) /* TODO */
+/* TODO */
+# define is_ascii_string(str) (1)
+# define is_broken_string(str) (0)
+# define STR_ENC_GET(str) (NULL)
 #else
+# define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
+# define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
 # define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
 #endif
 
+#if !WITH_OBJC
 static int
 single_byte_optimizable(VALUE str)
 {
@@ -240,9 +243,11 @@
      * "\xa1" in Shift_JIS for example. */
     return 0;
 }
+#endif
 
 VALUE rb_fs;
 
+#if !WITH_OBJC
 static inline const char *
 search_nonascii(const char *p, const char *e)
 {
@@ -420,9 +425,6 @@
 int
 rb_enc_str_coderange(VALUE str)
 {
-#if WITH_OBJC
-    return ENC_CODERANGE_VALID;
-#else
     int cr = ENC_CODERANGE(str);
 
     if (cr == ENC_CODERANGE_UNKNOWN) {
@@ -431,15 +433,11 @@
         ENC_CODERANGE_SET(str, cr);
     }
     return cr;
-#endif
 }
 
 int
 rb_enc_str_asciionly_p(VALUE str)
 {
-#if WITH_OBJC
-    return Qtrue;
-#else
     rb_encoding *enc = STR_ENC_GET(str);
 
     if (!rb_enc_asciicompat(enc))
@@ -447,8 +445,8 @@
     else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
         return Qtrue;
     return Qfalse;
+}
 #endif
-}
 
 static inline void
 str_mod_check(VALUE s, const char *p, long len)
@@ -585,7 +583,7 @@
 {
     VALUE str = str_new(rb_cString, ptr, len);
 
-    ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
+    //ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
     return str;
 }
 
@@ -908,6 +906,7 @@
     return str;
 }
 
+#if !WITH_OBJC
 long
 rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
 {
@@ -990,6 +989,7 @@
     if (!*cr) *cr = ENC_CODERANGE_7BIT;
     return c;
 }
+#endif
 
 static long
 str_strlen(VALUE str, rb_encoding *enc)
@@ -1399,6 +1399,7 @@
     return rb_check_string_type(str);
 }
 
+#if !WITH_OBJC
 char*
 rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc)
 {
@@ -1512,7 +1513,8 @@
     if (!pp) return e - p;
     return pp - p;
 }
-#endif
+#endif /* NONASCII_MASK */
+#endif /* WITH_OBJC */
 
 /* byte offset to char offset */
 long
@@ -2904,6 +2906,7 @@
     NEIGHBOR_WRAPPED
 };
 
+#if !WITH_OBJC
 static enum neighbor_char
 enc_succ_char(char *p, int len, rb_encoding *enc)
 {
@@ -2967,6 +2970,7 @@
         }
     }
 }
+#endif
 
 /*
   overwrite +p+ by succeeding letter in +enc+ and returns
@@ -2980,6 +2984,10 @@
 static enum neighbor_char
 enc_succ_alnum_char(char *p, int len, rb_encoding *enc, char *carry)
 {
+#if WITH_OBJC
+    /* TODO rewrite me */
+    return NEIGHBOR_NOT_CHAR;
+#else
     enum neighbor_char ret;
     int c;
     int ctype;
@@ -3031,6 +3039,7 @@
     MEMCPY(carry, p, char, len);
     enc_succ_char(carry, len, enc);
     return NEIGHBOR_WRAPPED;
+#endif
 }
 
 
@@ -3062,6 +3071,69 @@
 VALUE
 rb_str_succ(VALUE orig)
 {
+#if WITH_OBJC
+    UniChar *buf;
+    UniChar carry;
+    long i, len;
+    bool modified;
+
+    len = CFStringGetLength((CFStringRef)orig);
+    if (len == 0)
+	return orig;
+
+    buf = (UniChar *)alloca(sizeof(UniChar) * (len + 1));
+    buf++;
+    
+    CFStringGetCharacters((CFStringRef)orig, CFRangeMake(0, len), buf);
+    modified = false;
+    carry = 0;
+
+    for (i = len - 1; i >= 0; i--) {
+	UniChar c = buf[i];
+	if (iswdigit(c)) {
+	    modified = true;
+	    if (c != '9') {
+		buf[i]++;
+		carry = 0;
+		break;
+	    }
+	    else {
+		buf[i] = '0';
+		carry = '1';
+	    }
+	}
+	else if (iswalpha(c)) {
+	    bool lower = islower(c);
+	    UniChar e = lower ? 'z' : 'Z';
+	    modified = true;
+	    if (c != e) {
+		buf[i]++;
+		carry = 0;
+		break;
+	    }
+	    else {
+		carry = buf[i] = lower ? 'a' : 'A';
+	    }
+	}
+    }
+
+    if (!modified) {
+	buf[len-1]++;
+    }
+    else if (carry != 0) {
+	buf--;
+	*buf = carry;
+	len++;
+    }
+
+    CFMutableStringRef newstr;
+
+    newstr = CFStringCreateMutable(NULL, 0);
+    CFStringAppendCharacters(newstr, buf, len);
+    CFMakeCollectable(newstr);
+
+    return (VALUE)newstr;
+#else
     rb_encoding *enc;
     VALUE str;
     char *sbeg, *s, *e;
@@ -3133,6 +3205,7 @@
     rb_enc_str_coderange(str);
 #endif
     return str;
+#endif
 }
 
 
@@ -3186,8 +3259,28 @@
 
     rb_scan_args(argc, argv, "11", &end, &exclusive);
     excl = RTEST(exclusive);
-    succ = rb_intern("succ");
     StringValue(end);
+#if WITH_OBJC
+    if (RSTRING_CLEN(beg) == 1 && RSTRING_CLEN(end) == 1) {
+	UniChar c = CFStringGetCharacterAtIndex((CFStringRef)beg, 0);
+	UniChar e = CFStringGetCharacterAtIndex((CFStringRef)end, 0);
+	
+	if (c > e || (excl && c == e)) 
+	    return beg;
+	for (;;) {
+	    CFMutableStringRef substr;
+	    substr = CFStringCreateMutable(NULL, 0);
+	    CFStringAppendCharacters(substr, &c, 1);
+	    CFMakeCollectable(substr);
+	    rb_yield((VALUE)substr);
+	    if (!excl && c == e) 
+		break;
+	    c++;
+	    if (excl && c == e) 
+		break;
+	}
+	return beg;
+#else
     enc = rb_enc_check(beg, end);
     if (RSTRING_CLEN(beg) == 1 && RSTRING_CLEN(end) == 1 &&
 	is_ascii_string(beg) && is_ascii_string(end)) {
@@ -3202,10 +3295,12 @@
 	    if (excl && c == e) break;
 	}
 	return beg;
+#endif
     }
     n = rb_str_cmp(beg, end);
     if (n > 0 || (excl && n == 0)) return beg;
 	
+    succ = rb_intern("succ");
     after_end = rb_funcall(end, succ, 0, 0);
     current = beg;
     while (!rb_str_equal(current, after_end)) {
@@ -3456,9 +3551,13 @@
     end = END(nth);
     len = end - start;
     StringValue(val);
+#if !WITH_OBJC
     enc = rb_enc_check(str, val);
+#endif
     rb_str_splice_0(str, start, len, val);
+#if !WITH_OBJC
     rb_enc_associate(str, enc);
+#endif
 }
 
 static VALUE
@@ -3681,7 +3780,9 @@
     pat = get_pat(argv[0], 1);
     if (rb_reg_search(pat, str, 0, 0) >= 0) {
 	rb_encoding *enc;
+#if !WITH_OBJC
 	int cr = ENC_CODERANGE(str);
+#endif
 
 	match = rb_backref_get();
 	regs = RMATCH_REGS(match);
@@ -3859,9 +3960,7 @@
                 val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
                 val = rb_obj_as_string(val);
             }
-#if !WITH_OBJC
 	    str_mod_check(str, sp, slen);
-#endif
 	    if (bang) str_frozen_check(str);
 	    if (val == dest) { 	/* paranoid check [ruby-dev:24827] */
 		rb_raise(rb_eRuntimeError, "block should not cheat");
@@ -3888,7 +3987,11 @@
 	     * in order to prevent infinite loops.
 	     */
 	    if (slen <= END(0)) break;
+#if WITH_OBJC
+	    len = 1;
+#else
 	    len = rb_enc_mbclen(sp+END(0), sp+slen, str_enc);
+#endif
             rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
 	    offset = END(0) + len;
 	}
@@ -4340,11 +4443,16 @@
 static void
 str_cat_char(VALUE str, int c, rb_encoding *enc)
 {
+#if WITH_OBJC
+    CFStringAppendCharacters((CFMutableStringRef)str, 
+	(const UniChar *)&c, 1); 	 
+#else
     char s[16];
     int n = rb_enc_codelen(c, enc);
 
     rb_enc_mbcput(c, s, enc);
     rb_enc_str_buf_cat(str, s, n, enc);
+#endif
 }
 
 static void
@@ -4388,14 +4496,20 @@
     p = RSTRING_PTR(str); pend = RSTRING_END(str);
 #endif
     result = rb_str_buf_new2("");
+#if !WITH_OBJC
     if (!rb_enc_asciicompat(enc)) enc = rb_usascii_encoding();
     rb_enc_associate(result, enc);
+#endif
     str_cat_char(result, '"', enc);
     while (p < pend) {
 	int c;
 	int n;
 	int cc;
 
+#if WITH_OBJC
+	c = *p;
+	n = 1;
+#else
         n = rb_enc_precise_mbclen(p, pend, enc);
         if (!MBCLEN_CHARFOUND_P(n)) {
             p++;
@@ -4406,13 +4520,18 @@
 
 	c = rb_enc_codepoint(p, pend, enc);
 	n = rb_enc_codelen(c, enc);
+#endif
 
 	p += n;
 	if (c == '"'|| c == '\\' ||
 	    (c == '#' &&
              p < pend &&
+#if WITH_OBJC
+	     ((cc = *p),
+#else
              MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
              (cc = rb_enc_codepoint(p,pend,enc),
+#endif
               (cc == '$' || cc == '@' || cc == '{')))) {
 	    prefix_escape(result, c, enc);
 	}
@@ -4522,7 +4641,7 @@
     }
     if (!rb_enc_asciicompat(enc0)) {
 	len += 19;		/* ".force_encoding('')" */
-	len += strlen(enc0->name);
+	len += strlen(rb_enc_name(enc0));
     }
 
     result = rb_str_new5(str, 0, len);
@@ -4584,13 +4703,17 @@
     }
     *q++ = '"';
     if (!rb_enc_asciicompat(enc0)) {
-	sprintf(q, ".force_encoding(\"%s\")", enc0->name);
+	sprintf(q, ".force_encoding(\"%s\")", rb_enc_name(enc0));
+#if !WITH_OBJC
 	enc0 = rb_ascii8bit_encoding();
+#endif
     }
 
     OBJ_INFECT(result, str);
     /* result from dump is ASCII */
+#if !WITH_OBJC
     rb_enc_associate(result, enc0);
+#endif
     RSTRING_SYNC(result);
     return result;
 }
@@ -5512,7 +5635,9 @@
     }
     
     if (modify) {
+#if !WITH_OBJC
 	rb_enc_associate(str, enc);
+#endif
 	return str;
     }
     return Qnil;
@@ -6016,13 +6141,16 @@
     VALUE spat;
     VALUE limit;
     int awk_split = Qfalse;
+    int spat_string = Qfalse;
     long beg, end, i = 0;
     int lim = 0;
     VALUE result, tmp;
     const char *cstr;
     long clen;
 
+#if !WITH_OBJC
     cstr = RSTRING_CPTR(str);
+#endif
     clen = RSTRING_CLEN(str);
 
     if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
@@ -6036,11 +6164,7 @@
 	i = 1;
     }
 
-#if WITH_OBJC
-    enc = rb_ascii8bit_encoding();
-#else
     enc = STR_ENC_GET(str);
-#endif
     result = rb_ary_new();
     if (NIL_P(spat)) {
 	if (!NIL_P(rb_fs)) {
@@ -6052,17 +6176,19 @@
     else {
       fs_set:
 	if (TYPE(spat) == T_STRING) {
+	    spat_string = Qtrue;
+#if WITH_OBJC
+	    if (RSTRING_CLEN(spat) == 1
+		&& CFStringGetCharacterAtIndex((CFStringRef)spat, 0) == ' ') {
+		awk_split = Qtrue;
+	    }
+#else
 	    const char *spat_cstr;
 	    long spat_clen;
-#if WITH_OBJC
-	    rb_encoding *enc2 = rb_ascii8bit_encoding();
-#else
 	    rb_encoding *enc2 = STR_ENC_GET(spat);
-#endif
 
 	    spat_cstr = RSTRING_CPTR(spat);
 	    spat_clen = RSTRING_CLEN(spat);
-
 	    if (rb_enc_mbminlen(enc2) == 1) {
 		if (spat_clen == 1 && spat_cstr[0] == ' '){
 		    awk_split = Qtrue;
@@ -6076,47 +6202,59 @@
 		}
 	    }
 	    if (!awk_split) {
+		spat = rb_reg_regcomp(rb_reg_quote(spat));
+	    }
+#endif
+	}
+	else {
+	    spat = get_pat(spat, 1);
+	}
+    }
+
+    beg = 0;
 #if WITH_OBJC
-		CFRange search_range;
-		search_range = CFRangeMake(0, clen);
-		do {
-		    CFRange result_range;
-		    CFRange substr_range;
-		    if (!CFStringFindWithOptions((CFStringRef)str, 
+    if (awk_split || spat_string) {
+	CFRange search_range;
+	if (spat == Qnil)
+	    spat = (VALUE)CFSTR(" ");
+	search_range = CFRangeMake(0, clen);
+	do {
+	    CFRange result_range;
+	    CFRange substr_range;
+	    if (!CFStringFindWithOptions((CFStringRef)str, 
 			(CFStringRef)spat,
 			search_range,
 			0,
 			&result_range))
-			break;
+		break;
 
-		    substr_range.location = search_range.location;
-		    substr_range.length = result_range.location 
-			- search_range.location;
+	    substr_range.location = search_range.location;
+	    substr_range.length = result_range.location 
+		- search_range.location;
 
-		    rb_ary_push(result,
-			rb_str_subseq(str, substr_range.location,
-			    substr_range.length));
+	    if (awk_split == Qfalse || substr_range.length > 0) {
+		VALUE substr;
+	       
+		substr = rb_str_subseq(str, substr_range.location,
+		    substr_range.length);
 
-		    search_range.location = result_range.location 
-			+ result_range.length;
-		    search_range.length = clen - search_range.location;
+		if (awk_split == Qtrue) {
+		    CFStringTrimWhitespace((CFMutableStringRef)substr);
+		    if (CFStringGetLength((CFStringRef)substr) > 0)
+			rb_ary_push(result, substr);
 		}
-		while ((limit == Qnil || --lim > 1));
-		rb_ary_push(result, 
-		    rb_str_subseq(str, search_range.location, 
-			search_range.length));
-		goto done;
-#else
-		spat = rb_reg_regcomp(rb_reg_quote(spat));
-#endif
+		else {
+		    rb_ary_push(result, substr);
+		}
 	    }
+
+	    search_range.location = result_range.location 
+		+ result_range.length;
+	    search_range.length = clen - search_range.location;
 	}
-	else {
-	    spat = get_pat(spat, 1);
-	}
-    }
-
-    beg = 0;
+	while ((limit == Qnil || --lim > 1));
+	beg = search_range.location;
+#else
     if (awk_split) {
 	const char *ptr = cstr;
 	const char *eptr = cstr+clen;
@@ -6150,6 +6288,7 @@
 		}
 	    }
 	}
+#endif
     }
     else {
 	long start = beg;
@@ -6160,22 +6299,34 @@
 	while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
 	    regs = RMATCH_REGS(rb_backref_get());
 	    if (start == end && BEG(0) == END(0)) {
+#if WITH_OBJC
+		if (0) {
+#else
 		if (!cstr) {
-		    //rb_ary_push(result, rb_str_new("", 0));
+		    rb_ary_push(result, rb_str_new("", 0));
+#endif
 		    break;
 		}
 		else if (last_null == 1) {
+#if WITH_OBJC
+		    rb_ary_push(result, rb_str_subseq(str, beg, 1));
+#else
 		    rb_ary_push(result, rb_str_subseq(str, beg,
 						      rb_enc_mbclen(cstr+beg,
 								    cstr+clen,
 								    enc)));
+#endif
 		    beg = start;
 		}
 		else {
-                    if (cstr+start == cstr+clen)
+                    if (start == clen)
                         start++;
                     else
+#if WITH_OBJC
+			start += 1;
+#else
                         start += rb_enc_mbclen(cstr+start,cstr+clen,enc);
+#endif
 		    last_null = 1;
 		    continue;
 		}
@@ -6204,7 +6355,6 @@
 	    tmp = rb_str_subseq(str, beg, clen-beg);
 	rb_ary_push(result, tmp);
     }
-done:
     if (NIL_P(limit) && lim == 0) {
 	while (RARRAY_LEN(result) > 0 &&
 	       RSTRING_CLEN(RARRAY_AT(result, RARRAY_LEN(result)-1)) == 0)
@@ -7885,7 +8035,7 @@
     VALUE str;
 
     sym = rb_id2str(id);
-    if (!rb_enc_symname_p(RSTRING_CPTR(sym), rb_ascii8bit_encoding())) {
+    if (!rb_enc_symname_p(RSTRING_CPTR(sym), NULL)) {
 	sym = rb_str_inspect(sym);
     }
     str = rb_str_new(":", 1);

Modified: MacRuby/trunk/time.c
===================================================================
--- MacRuby/trunk/time.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/time.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -2097,7 +2097,7 @@
 	    rb_str_cat(str, buf, len);
 	    p += strlen(p);
 	    if (buf != buffer) {
-		free(buf);
+		xfree(buf);
 		buf = buffer;
 	    }
 	    for (fmt = p; p < pe && !*p; ++p);
@@ -2109,8 +2109,10 @@
 	len = rb_strftime(&buf, RSTRING_CPTR(format), &tobj->tm);
     }
     str = rb_str_new(buf, len);
-    if (buf != buffer) free(buf);
+    if (buf != buffer) xfree(buf);
+#if !WITH_OBJC
     rb_enc_copy(str, format);
+#endif
     return str;
 }
 

Modified: MacRuby/trunk/transcode.c
===================================================================
--- MacRuby/trunk/transcode.c	2008-05-11 20:36:13 UTC (rev 191)
+++ MacRuby/trunk/transcode.c	2008-05-15 08:06:22 UTC (rev 192)
@@ -10,6 +10,9 @@
 **********************************************************************/
 
 #include "ruby/ruby.h"
+
+#if !WITH_OBJC
+
 #include "ruby/encoding.h"
 #define PType (int)
 #include "transcode_data.h"
@@ -441,15 +444,35 @@
     return newstr;
 }
 
+#else // WITH_OBJC
+
+static VALUE
+rb_str_transcode(int argc, VALUE *argv, VALUE self)
+{
+    /* TODO */
+    return self;
+}
+
+static VALUE
+rb_str_transcode_bang(int argc, VALUE *argv, VALUE self)
+{
+    /* TODO */
+    return self;
+}
+
+#endif
+
 void
 Init_transcode(void)
 {
+#if !WITH_OBJC
     transcoder_table = st_init_strcasetable();
     transcoder_lib_table = st_init_strcasetable();
     init_transcoder_table();
 
     sym_invalid = ID2SYM(rb_intern("invalid"));
     sym_ignore = ID2SYM(rb_intern("ignore"));
+#endif
 
     rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
     rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);

    

source_changes＠macosforge.org

tags

participants (1)