[macruby-changes] [3555] MacRuby/branches/icu

source_changes at macosforge.org source_changes at macosforge.org
Tue Feb 16 14:44:15 PST 2010


Revision: 3555
          http://trac.macosforge.org/projects/ruby/changeset/3555
Author:   lsansonetti at apple.com
Date:     2010-02-16 14:44:14 -0800 (Tue, 16 Feb 2010)
Log Message:
-----------
more work

Modified Paths:
--------------
    MacRuby/branches/icu/encoding.c
    MacRuby/branches/icu/encoding.h
    MacRuby/branches/icu/include/ruby/encoding.h
    MacRuby/branches/icu/marshal.c
    MacRuby/branches/icu/re.c
    MacRuby/branches/icu/ruby.c
    MacRuby/branches/icu/string.c
    MacRuby/branches/icu/ucnv.c

Modified: MacRuby/branches/icu/encoding.c
===================================================================
--- MacRuby/branches/icu/encoding.c	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/encoding.c	2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,31 +1,37 @@
+/* 
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
 #include "encoding.h"
 #include <string.h>
 
-// TODO:
-// - use rb_usascii_str_new_cstr instead of rb_str_new2
+VALUE rb_cEncoding;
 
-VALUE rb_cEncoding = 0;
+static rb_encoding_t *default_internal = NULL;
+static rb_encoding_t *default_external = NULL;
+rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
 
-#define ENC(x) ((encoding_t *)(x))
+static void str_undefined_update_flags(rb_str_t *self) { abort(); }
+static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
+static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
+static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
+static long str_undefined_bytesize(rb_str_t *self) { abort(); }
+static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
+static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
 
-encoding_t *default_internal = NULL;
-encoding_t *default_external = NULL;
-encoding_t *encodings[ENCODINGS_COUNT];
-
-static void str_undefined_update_flags(string_t *self) { abort(); }
-static void str_undefined_make_data_binary(string_t *self) { abort(); }
-static bool str_undefined_try_making_data_uchars(string_t *self) { abort(); }
-static long str_undefined_length(string_t *self, bool ucs2_mode) { abort(); }
-static long str_undefined_bytesize(string_t *self) { abort(); }
-static character_boundaries_t str_undefined_get_character_boundaries(string_t *self, long index, bool ucs2_mode) { abort(); }
-static long str_undefined_offset_in_bytes_to_index(string_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
-
 static VALUE
 mr_enc_s_list(VALUE klass, SEL sel)
 {
     VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
     for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
-	rb_ary_push(ary, (VALUE)encodings[i]);
+	rb_ary_push(ary, (VALUE)rb_encodings[i]);
     }
     return ary;
 }
@@ -35,11 +41,11 @@
 {
     VALUE ary = rb_ary_new();
     for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
-	encoding_t *encoding = ENC(encodings[i]);
+	rb_encoding_t *encoding = RENC(rb_encodings[i]);
 	// TODO: use US-ASCII strings
-	rb_ary_push(ary, rb_str_new2(encoding->public_name));
+	rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
 	for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
-	    rb_ary_push(ary, rb_str_new2(encoding->aliases[j]));
+	    rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
 	}
     }
     return ary;
@@ -50,11 +56,10 @@
 {
     VALUE hash = rb_hash_new();
     for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
-	encoding_t *encoding = ENC(encodings[i]);
+	rb_encoding_t *encoding = RENC(rb_encodings[i]);
 	for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
-	    rb_hash_aset(hash,
-		    rb_str_new2(encoding->aliases[j]),
-		    rb_str_new2(encoding->public_name));
+	    rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
+		    rb_usascii_str_new2(encoding->public_name));
 	}
     }
     return hash;
@@ -75,25 +80,25 @@
 static VALUE
 mr_enc_name(VALUE self, SEL sel)
 {
-    return rb_str_new2(ENC(self)->public_name);
+    return rb_usascii_str_new2(RENC(self)->public_name);
 }
 
 static VALUE
 mr_enc_inspect(VALUE self, SEL sel)
 {
     return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
-	    ENC(self)->public_name);
+	    RENC(self)->public_name);
 }
 
 static VALUE
 mr_enc_names(VALUE self, SEL sel)
 {
-    encoding_t *encoding = ENC(self);
+    rb_encoding_t *encoding = RENC(self);
 
     VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
-    rb_ary_push(ary, rb_str_new2(encoding->public_name));
+    rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
     for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
-	rb_ary_push(ary, rb_str_new2(encoding->aliases[i]));
+	rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
     }
     return ary;
 }
@@ -101,7 +106,7 @@
 static VALUE
 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
 {
-    return ENC(self)->ascii_compatible ? Qtrue : Qfalse;
+    return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
 }
 
 static VALUE
@@ -111,7 +116,7 @@
 }
 
 static void
-define_encoding_constant(const char *name, encoding_t *encoding)
+define_encoding_constant(const char *name, rb_encoding_t *encoding)
 {
     char c = name[0];
     if ((c >= '0') && (c <= '9')) {
@@ -135,7 +140,7 @@
     free(name_copy);
 }
 
-extern void enc_init_ucnv_encoding(encoding_t *encoding);
+extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
 
 enum {
     ENCODING_TYPE_SPECIAL = 0,
@@ -146,7 +151,7 @@
 add_encoding(
 	unsigned int encoding_index, // index of the encoding in the encodings
 				     // array
-	unsigned int encoding_type,
+	unsigned int rb_encoding_type,
 	const char *public_name, // public name for the encoding
 	unsigned char min_char_size,
 	bool single_byte_encoding, // in the encoding a character takes only
@@ -175,11 +180,11 @@
     va_end(va_aliases);
 
     // create the MacRuby object
-    NEWOBJ(encoding, encoding_t);
+    NEWOBJ(encoding, rb_encoding_t);
     encoding->basic.flags = 0;
     encoding->basic.klass = rb_cEncoding;
-    encodings[encoding_index] = encoding;
-    rb_objc_retain(encoding); // it should never be deallocated
+    rb_encodings[encoding_index] = encoding;
+    GC_RETAIN(encoding); // it should never be deallocated
 
     // fill the fields
     encoding->index = encoding_index;
@@ -202,7 +207,7 @@
     encoding->methods.offset_in_bytes_to_index =
 	str_undefined_offset_in_bytes_to_index;
 
-    switch (encoding_type) {
+    switch (rb_encoding_type) {
 	case ENCODING_TYPE_SPECIAL:
 	    break;
 	case ENCODING_TYPE_UCNV:
@@ -236,8 +241,8 @@
     //add_encoding(ENCODING_SJIS,      ENCODING_TYPE_RUBY, "Shift_JIS",   1, false, true, "SJIS", NULL);
     //add_encoding(ENCODING_CP932,     ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
 
-    default_external = encodings[ENCODING_UTF8];
-    default_internal = encodings[ENCODING_UTF8];
+    default_external = rb_encodings[ENCODING_UTF8];
+    default_internal = rb_encodings[ENCODING_UTF8];
 }
 
 VALUE
@@ -280,3 +285,96 @@
 
     create_encodings();
 }
+
+// MRI C-API compatibility.
+
+rb_encoding_t *
+rb_enc_find(const char *name)
+{
+    for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
+	rb_encoding_t *enc = rb_encodings[i];
+	if (strcasecmp(enc->public_name, name) == 0) {
+	    return enc;
+	}
+	for (unsigned int j = 0; j < enc->aliases_count; j++) {
+	    const char *alias = enc->aliases[j];
+	    if (strcasecmp(alias, name) == 0) {
+		return enc;
+	    }
+	}
+    }
+    return NULL;
+}
+
+VALUE
+rb_enc_from_encoding(rb_encoding_t *enc)
+{
+    return (VALUE)enc;
+}
+
+rb_encoding_t *
+rb_enc_get(VALUE obj)
+{
+    if (IS_RSTR(obj)) {
+	return RSTR(obj)->encoding;
+    }
+    // TODO support symbols
+    return NULL;
+}
+
+rb_encoding_t *
+rb_to_encoding(VALUE obj)
+{
+    rb_encoding_t *enc;
+    if (CLASS_OF(obj) == rb_cEncoding) {
+	enc = RENC(obj);
+    }
+    else {
+	StringValue(obj);
+	enc = rb_enc_find(RSTRING_PTR(obj));
+	if (enc == NULL) {
+	    rb_raise(rb_eArgError, "unknown encoding name - %s",
+		    RSTRING_PTR(obj));
+	}
+    }
+    return enc;
+}
+
+const char *
+rb_enc_name(rb_encoding_t *enc)
+{
+    return RENC(enc)->public_name;
+}
+
+VALUE
+rb_enc_name2(rb_encoding_t *enc)
+{
+    return rb_usascii_str_new2(rb_enc_name(enc));
+}
+
+long
+rb_enc_mbminlen(rb_encoding_t *enc)
+{
+    return enc->min_char_size;    
+}
+
+long
+rb_enc_mbmaxlen(rb_encoding_t *enc)
+{
+    return enc->single_byte_encoding ? 1 : 10; // XXX 10?
+}
+
+rb_encoding_t *
+rb_locale_encoding(void)
+{
+    // XXX
+    return rb_encodings[ENCODING_UTF8];
+}
+
+void
+rb_enc_set_default_external(VALUE encoding)
+{
+    assert(CLASS_OF(encoding) == rb_cEncoding);
+    default_external = RENC(encoding); 
+}
+

Modified: MacRuby/branches/icu/encoding.h
===================================================================
--- MacRuby/branches/icu/encoding.h	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/encoding.h	2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,3 +1,14 @@
+/* 
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
 #ifndef __ENCODING_H_
 #define __ENCODING_H_
 
@@ -22,24 +33,24 @@
 #endif
 
 #define NATIVE_UTF16_ENC(encoding) \
-    ((encoding) == encodings[ENCODING_UTF16_NATIVE])
+    ((encoding) == rb_encodings[ENCODING_UTF16_NATIVE])
 #define NON_NATIVE_UTF16_ENC(encoding) \
-    ((encoding) == encodings[ENCODING_UTF16_NON_NATIVE])
+    ((encoding) == rb_encodings[ENCODING_UTF16_NON_NATIVE])
 #define UTF16_ENC(encoding) \
     (NATIVE_UTF16_ENC(encoding) || NON_NATIVE_UTF16_ENC(encoding))
 #define NATIVE_UTF32_ENC(encoding) \
-    ((encoding) == encodings[ENCODING_UTF32_NATIVE])
+    ((encoding) == rb_encodings[ENCODING_UTF32_NATIVE])
 #define NON_NATIVE_UTF32_ENC(encoding) \
-    ((encoding) == encodings[ENCODING_UTF32_NON_NATIVE])
+    ((encoding) == rb_encodings[ENCODING_UTF32_NON_NATIVE])
 #define UTF32_ENC(encoding) \
     (NATIVE_UTF32_ENC(encoding) || NON_NATIVE_UTF32_ENC(encoding))
-#define BINARY_ENC(encoding) ((encoding) == encodings[ENCODING_BINARY])
+#define BINARY_ENC(encoding) ((encoding) == rb_encodings[ENCODING_BINARY])
 
 typedef uint8_t str_flag_t;
 
-typedef struct  {
+typedef struct {
     struct RBasic basic;
-    struct encoding_s *encoding;
+    struct rb_encoding *encoding;
     long capacity_in_bytes;
     long length_in_bytes;
     union {
@@ -47,9 +58,9 @@
 	UChar *uchars;
     } data;
     str_flag_t flags;
-} string_t;
+} rb_str_t;
 
-#define RSTR(x) ((string_t *)x)
+#define RSTR(x) ((rb_str_t *)x)
 
 static inline bool
 rb_klass_is_rstr(VALUE klass)
@@ -75,16 +86,16 @@
 } character_boundaries_t;
 
 typedef struct {
-    void (*update_flags)(string_t *);
-    void (*make_data_binary)(string_t *);
-    bool (*try_making_data_uchars)(string_t *);
-    long (*length)(string_t *, bool);
-    long (*bytesize)(string_t *);
-    character_boundaries_t (*get_character_boundaries)(string_t *, long, bool);
-    long (*offset_in_bytes_to_index)(string_t *, long, bool);
+    void (*update_flags)(rb_str_t *);
+    void (*make_data_binary)(rb_str_t *);
+    bool (*try_making_data_uchars)(rb_str_t *);
+    long (*length)(rb_str_t *, bool);
+    long (*bytesize)(rb_str_t *);
+    character_boundaries_t (*get_character_boundaries)(rb_str_t *, long, bool);
+    long (*offset_in_bytes_to_index)(rb_str_t *, long, bool);
 } encoding_methods_t;
 
-typedef struct encoding_s {
+typedef struct rb_encoding {
     struct RBasic basic;
     unsigned int index;
     const char *public_name;
@@ -95,8 +106,10 @@
     bool ascii_compatible : 1;
     encoding_methods_t methods;
     void *private_data;
-} encoding_t;
+} rb_encoding_t;
 
+#define RENC(x) ((rb_encoding_t *)(x))
+
 enum {
     ENCODING_BINARY = 0,
     ENCODING_ASCII,
@@ -114,10 +127,8 @@
     ENCODINGS_COUNT
 };
 
-extern encoding_t *encodings[ENCODINGS_COUNT];
+extern rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
 
-extern VALUE rb_cMREncoding;
-
 #define STRING_HAS_SUPPLEMENTARY     0x020
 #define STRING_HAS_SUPPLEMENTARY_SET 0x010
 #define STRING_ASCII_ONLY            0x008
@@ -140,31 +151,31 @@
     return ((a) + (b - 1)) / b;
 }
 
-void str_update_flags(string_t *self);
+void str_update_flags(rb_str_t *self);
 
 static inline void
-str_unset_facultative_flags(string_t *self)
+str_unset_facultative_flags(rb_str_t *self)
 {
     self->flags &= ~STRING_HAS_SUPPLEMENTARY_SET & ~STRING_ASCII_ONLY_SET
 	& ~STRING_VALID_ENCODING_SET;
 }
 
 static inline bool
-str_known_to_have_an_invalid_encoding(string_t *self)
+str_known_to_have_an_invalid_encoding(rb_str_t *self)
 {
     return (self->flags & (STRING_VALID_ENCODING_SET
 		| STRING_VALID_ENCODING)) == STRING_VALID_ENCODING_SET;
 }
 
 static inline bool
-str_known_not_to_have_any_supplementary(string_t *self)
+str_known_not_to_have_any_supplementary(rb_str_t *self)
 {
     return (self->flags & (STRING_HAS_SUPPLEMENTARY_SET
 		| STRING_HAS_SUPPLEMENTARY)) == STRING_HAS_SUPPLEMENTARY_SET;
 }
 
 static inline bool
-str_check_flag_and_update_if_needed(string_t *self, str_flag_t flag_set,
+str_check_flag_and_update_if_needed(rb_str_t *self, str_flag_t flag_set,
 	str_flag_t flag)
 {
     if (!(self->flags & flag_set)) {
@@ -175,21 +186,21 @@
 }
 
 static inline bool
-str_is_valid_encoding(string_t *self)
+str_is_valid_encoding(rb_str_t *self)
 {
     return str_check_flag_and_update_if_needed(self, STRING_VALID_ENCODING_SET,
 	    STRING_VALID_ENCODING);
 }
 
 static inline bool
-str_is_ascii_only(string_t *self)
+str_is_ascii_only(rb_str_t *self)
 {
     return str_check_flag_and_update_if_needed(self, STRING_ASCII_ONLY_SET,
 	    STRING_ASCII_ONLY);
 }
 
 static inline bool
-str_is_ruby_ascii_only(string_t *self)
+str_is_ruby_ascii_only(rb_str_t *self)
 {
     // for MRI, a string in a non-ASCII-compatible encoding (like UTF-16)
     // containing only ASCII characters is not "ASCII only" though for us it
@@ -201,19 +212,19 @@
 }
 
 static inline bool
-str_is_stored_in_uchars(string_t *self)
+str_is_stored_in_uchars(rb_str_t *self)
 {
     return self->flags & STRING_STORED_IN_UCHARS;
 }
 
 static inline void
-str_negate_stored_in_uchars(string_t *self)
+str_negate_stored_in_uchars(rb_str_t *self)
 {
     self->flags ^= STRING_STORED_IN_UCHARS;
 }
 
 static inline void
-str_set_stored_in_uchars(string_t *self, bool status)
+str_set_stored_in_uchars(rb_str_t *self, bool status)
 {
     if (status) {
 	self->flags |= STRING_STORED_IN_UCHARS;
@@ -224,7 +235,7 @@
 }
 
 static inline void
-str_set_facultative_flag(string_t *self, bool status, str_flag_t flag_set,
+str_set_facultative_flag(rb_str_t *self, bool status, str_flag_t flag_set,
 	str_flag_t flag)
 {
     if (status) {
@@ -236,21 +247,21 @@
 }
 
 static inline void
-str_set_has_supplementary(string_t *self, bool status)
+str_set_has_supplementary(rb_str_t *self, bool status)
 {
     str_set_facultative_flag(self, status, STRING_HAS_SUPPLEMENTARY_SET,
 	    STRING_HAS_SUPPLEMENTARY);
 }
 
 static inline void
-str_set_ascii_only(string_t *self, bool status)
+str_set_ascii_only(rb_str_t *self, bool status)
 {
     str_set_facultative_flag(self, status, STRING_ASCII_ONLY_SET,
 	    STRING_ASCII_ONLY);
 }
 
 static inline void
-str_set_valid_encoding(string_t *self, bool status)
+str_set_valid_encoding(rb_str_t *self, bool status)
 {
     str_set_facultative_flag(self, status, STRING_VALID_ENCODING_SET,
 	    STRING_VALID_ENCODING);

Modified: MacRuby/branches/icu/include/ruby/encoding.h
===================================================================
--- MacRuby/branches/icu/include/ruby/encoding.h	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/include/ruby/encoding.h	2010-02-16 22:44:14 UTC (rev 3555)
@@ -16,83 +16,12 @@
 extern "C" {
 #endif
 
-#ifdef HAVE_STDARG_PROTOTYPES
-# include <stdarg.h>
-#else
-# include <varargs.h>
-#endif
+#include <stdarg.h>
 
-#if WITH_OBJC
+typedef struct rb_encoding rb_encoding;
 
-#include <wctype.h>
-
-typedef CFStringEncoding rb_encoding;
-
-#else
-
-#include "ruby/oniguruma.h"
-
-#define ENCODING_INLINE_MAX 1023
-#define ENCODING_SHIFT (FL_USHIFT+10)
-#define ENCODING_MASK (ENCODING_INLINE_MAX<<ENCODING_SHIFT)
-
-#define ENCODING_SET_INLINED(obj,i) do {\
-    RBASIC(obj)->flags &= ~ENCODING_MASK;\
-    RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\
-} while (0)
-#define ENCODING_SET(obj,i) do {\
-    VALUE rb_encoding_set_obj = (obj); \
-    int encoding_set_enc_index = (i); \
-    if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
-        ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
-    else \
-        rb_enc_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
-} while (0)
-
-#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
-#define ENCODING_GET(obj) \
-    (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
-     ENCODING_GET_INLINED(obj) : \
-     rb_enc_get_index(obj))
-
-#if WITH_OBJC
-# define ENCODING_IS_ASCII8BIT(obj) (1)
-#else
-# define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
-#endif
-
-#define ENC_CODERANGE_MASK	(FL_USER8|FL_USER9)
-#define ENC_CODERANGE_UNKNOWN	0
-#define ENC_CODERANGE_7BIT	FL_USER8
-#define ENC_CODERANGE_VALID	FL_USER9
-#define ENC_CODERANGE_BROKEN	(FL_USER8|FL_USER9)
-#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK)
-#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT)
-#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \
-				   (RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
-#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
-
-/* assumed ASCII compatiblity */
-#define ENC_CODERANGE_AND(a, b) \
-    (a == ENC_CODERANGE_7BIT ? b : \
-     a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \
-     ENC_CODERANGE_UNKNOWN)
-
-#define ENCODING_CODERANGE_SET(obj, encindex, cr) \
-    do { \
-        VALUE rb_encoding_coderange_obj = (obj); \
-        ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
-        ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
-    } while (0)
-
-typedef OnigEncodingType rb_encoding;
-#endif
-
-#define ENCODING_MAXNAMELEN 42
-
 int rb_enc_replicate(const char *, rb_encoding *);
 int rb_define_dummy_encoding(const char *);
-#define rb_enc_to_index(enc) ((enc) ? ((enc)->ruby_encoding_index) : 0)
 int rb_enc_get_index(VALUE obj);
 void rb_enc_set_index(VALUE obj, int encindex);
 int rb_enc_find_index(const char *name);
@@ -120,26 +49,13 @@
 /* name -> rb_encoding */
 rb_encoding * rb_enc_find(const char *name);
 
-#if WITH_OBJC
-rb_encoding * rb_enc_find2(VALUE name);
-#endif
-
 /* encoding -> name */
-#if WITH_OBJC
 const char *rb_enc_name(rb_encoding *);
 VALUE rb_enc_name2(rb_encoding *);
-#else
-#define rb_enc_name(enc) (enc)->name
-#endif
 
 /* encoding -> minlen/maxlen */
-#if WITH_OBJC
 long rb_enc_mbminlen(rb_encoding *);
 long rb_enc_mbmaxlen(rb_encoding *);
-#else
-#define rb_enc_mbminlen(enc) (enc)->min_enc_len
-#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
-#endif
 
 /* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
 int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
@@ -174,7 +90,8 @@
 /* ptr, ptr, encoding -> newline_or_not */
 #define rb_enc_is_newline(p,end,enc)  ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end))
 
-#if WITH_OBJC
+#include <wctype.h>
+
 #define rb_enc_isctype(c,t,enc)	(iswctype(c,t))
 #define rb_enc_isascii(c,enc)	(iswascii(c))
 #define rb_enc_isalpha(c,enc)	(iswalpha(c))
@@ -184,17 +101,6 @@
 #define rb_enc_isprint(c,enc)	(iswprint(c))
 #define rb_enc_isspace(c,enc)	(iswspace(c))
 #define rb_enc_isdigit(c,enc)	(iswdigit(c))
-#else
-#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
-#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
-#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
-#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c)
-#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c)
-#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c)
-#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c)
-#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
-#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
-#endif
 
 #define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc))
 
@@ -218,31 +124,18 @@
 int rb_ascii8bit_encindex(void);
 VALUE rb_enc_default_external(void);
 void rb_enc_set_default_external(VALUE encoding);
-//VALUE rb_locale_charmap(VALUE klass);
 long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
 
 VALUE rb_num_to_chr(VALUE, rb_encoding *);
 	
 RUBY_EXTERN VALUE rb_cEncoding;
 
-#define ENC_UNINITIALIZED (&rb_cEncoding)
-#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
-#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
-
-#define ENC_DUMMY_FLAG FL_USER2
-#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
-#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
-
-#if WITH_OBJC
-# define rb_enc_dummy_p(x) (Qfalse)
-#else
 static inline int
 rb_enc_dummy_p(rb_encoding *enc)
 {
-    if (!enc_initialized_p(enc)) return Qfalse;
-    return ENC_DUMMY_P(ENC_FROM_ENCODING(enc));
+    // TODO
+    return Qfalse;
 }
-#endif
 
 VALUE rb_str_transcode(VALUE str, VALUE to);
 

Modified: MacRuby/branches/icu/marshal.c
===================================================================
--- MacRuby/branches/icu/marshal.c	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/marshal.c	2010-02-16 22:44:14 UTC (rev 3555)
@@ -83,7 +83,15 @@
 static ID s_dump_data, s_load_data, s_alloc;
 static ID s_getbyte, s_read, s_write, s_binmode;
 
-ID rb_id_encoding(void);
+static ID
+rb_id_encoding(void)
+{
+    static ID id = 0;
+    if (id == 0) {
+	id = rb_intern("encoding");
+    }
+    return id;
+}
 
 typedef struct {
     VALUE newclass;

Modified: MacRuby/branches/icu/re.c
===================================================================
--- MacRuby/branches/icu/re.c	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/re.c	2010-02-16 22:44:14 UTC (rev 3555)
@@ -3750,7 +3750,7 @@
     rb_objc_define_method(rb_cRegexp, "source", rb_reg_source, 0);
     rb_objc_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
     rb_objc_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
-    rb_objc_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
+    //rb_objc_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
     rb_objc_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
     rb_objc_define_method(rb_cRegexp, "names", rb_reg_names, 0);
     rb_objc_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);

Modified: MacRuby/branches/icu/ruby.c
===================================================================
--- MacRuby/branches/icu/ruby.c	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/ruby.c	2010-02-16 22:44:14 UTC (rev 3555)
@@ -832,7 +832,7 @@
 static rb_encoding *
 opt_enc_find(VALUE enc_name)
 {
-    rb_encoding *enc = rb_enc_find2(enc_name);
+    rb_encoding *enc = rb_enc_find(RSTRING_PTR(enc_name));
     if (enc == NULL) {
 	rb_raise(rb_eRuntimeError, "unknown encoding name - %s", 
 	    RSTRING_PTR(enc_name));

Modified: MacRuby/branches/icu/string.c
===================================================================
--- MacRuby/branches/icu/string.c	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/string.c	2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,5 +1,5 @@
 /* 
- * MacRuby implementation of Ruby 1.9's string.c.
+ * MacRuby implementation of Ruby 1.9 String.
  *
  * This file is covered by the Ruby license. See COPYING for more details.
  * 
@@ -27,7 +27,7 @@
 VALUE rb_fs;
 
 static void
-str_update_flags_utf16(string_t *self)
+str_update_flags_utf16(rb_str_t *self)
 {
     assert(str_is_stored_in_uchars(self)
 	    || NON_NATIVE_UTF16_ENC(self->encoding));
@@ -108,7 +108,7 @@
 }
 
 void
-str_update_flags(string_t *self)
+str_update_flags(rb_str_t *self)
 {
     if (self->length_in_bytes == 0) {
 	str_set_valid_encoding(self, true);
@@ -136,7 +136,7 @@
 }
 
 static void
-str_invert_byte_order(string_t *self)
+str_invert_byte_order(rb_str_t *self)
 {
     assert(NON_NATIVE_UTF16_ENC(self->encoding));
 
@@ -155,8 +155,8 @@
     str_negate_stored_in_uchars(self);
 }
 
-static encoding_t *
-str_compatible_encoding(string_t *str1, string_t *str2)
+static rb_encoding_t *
+str_compatible_encoding(rb_str_t *str1, rb_str_t *str2)
 {
     if (str1->encoding == str2->encoding) {
 	return str1->encoding;
@@ -177,10 +177,10 @@
     return NULL;
 }
 
-static encoding_t *
-str_must_have_compatible_encoding(string_t *str1, string_t *str2)
+static rb_encoding_t *
+str_must_have_compatible_encoding(rb_str_t *str1, rb_str_t *str2)
 {
-    encoding_t *new_encoding = str_compatible_encoding(str1, str2);
+    rb_encoding_t *new_encoding = str_compatible_encoding(str1, str2);
     if (new_encoding == NULL) {
 	rb_raise(rb_eEncCompatError,
 		"incompatible character encodings: %s and %s",
@@ -189,13 +189,13 @@
     return new_encoding;
 }
 
-static string_t *
+static rb_str_t *
 str_alloc(void)
 {
-    NEWOBJ(str, string_t);
+    NEWOBJ(str, rb_str_t);
     str->basic.flags = 0;
     str->basic.klass = rb_cRubyString;
-    str->encoding = encodings[ENCODING_BINARY];
+    str->encoding = rb_encodings[ENCODING_BINARY];
     str->capacity_in_bytes = 0;
     str->length_in_bytes = 0;
     str->data.bytes = NULL;
@@ -204,8 +204,8 @@
 }
 
 static void
-str_replace_with_bytes(string_t *self, const char *bytes, long len,
-	encoding_t *enc)
+str_replace_with_bytes(rb_str_t *self, const char *bytes, long len,
+	rb_encoding_t *enc)
 {
     assert(len >= 0);
     self->flags = 0;
@@ -223,7 +223,7 @@
 }
 
 static void
-str_replace_with_string(string_t *self, string_t *source)
+str_replace_with_string(rb_str_t *self, rb_str_t *source)
 {
     if (self == source) {
 	return;
@@ -234,10 +234,10 @@
 }
 
 static void
-str_replace_with_cfstring(string_t *self, CFStringRef source)
+str_replace_with_cfstring(rb_str_t *self, CFStringRef source)
 {
     self->flags = 0;
-    self->encoding = encodings[ENCODING_UTF16_NATIVE];
+    self->encoding = rb_encodings[ENCODING_UTF16_NATIVE];
     self->capacity_in_bytes = self->length_in_bytes =
 	UCHARS_TO_BYTES(CFStringGetLength(source));
     if (self->length_in_bytes != 0) {
@@ -250,7 +250,7 @@
 }
 
 static void
-str_replace(string_t *self, VALUE arg)
+str_replace(rb_str_t *self, VALUE arg)
 {
     if (IS_RSTR(arg)) {
 	str_replace_with_string(self, RSTR(arg));
@@ -269,38 +269,38 @@
     }
 }
 
-static string_t *
+static rb_str_t *
 str_dup(VALUE source)
 {
-    string_t *destination = str_alloc();
+    rb_str_t *destination = str_alloc();
     str_replace(destination, source);
     return destination;
 }
 
 static void
-str_clear(string_t *self)
+str_clear(rb_str_t *self)
 {
     self->length_in_bytes = 0;
 }
 
-static string_t *
-str_new_from_string(string_t *source)
+static rb_str_t *
+str_new_from_string(rb_str_t *source)
 {
-    string_t *destination = str_alloc();
+    rb_str_t *destination = str_alloc();
     str_replace_with_string(destination, source);
     return destination;
 }
 
-static string_t *
+static rb_str_t *
 str_new_from_cfstring(CFStringRef source)
 {
-    string_t *destination = str_alloc();
+    rb_str_t *destination = str_alloc();
     str_replace_with_cfstring(destination, source);
     return destination;
 }
 
 static void
-str_make_data_binary(string_t *self)
+str_make_data_binary(rb_str_t *self)
 {
     if (!str_is_stored_in_uchars(self) || NATIVE_UTF16_ENC(self->encoding)) {
 	// nothing to do
@@ -318,7 +318,7 @@
 }
 
 static bool
-str_try_making_data_uchars(string_t *self)
+str_try_making_data_uchars(rb_str_t *self)
 {
     if (str_is_stored_in_uchars(self)) {
 	return true;
@@ -344,7 +344,7 @@
 }
 
 static void
-str_make_same_format(string_t *str1, string_t *str2)
+str_make_same_format(rb_str_t *str1, rb_str_t *str2)
 {
     if (str_is_stored_in_uchars(str1) != str_is_stored_in_uchars(str2)) {
 	if (str_is_stored_in_uchars(str1)) {
@@ -359,7 +359,7 @@
 }
 
 static long
-str_length(string_t *self, bool ucs2_mode)
+str_length(rb_str_t *self, bool ucs2_mode)
 {
     if (self->length_in_bytes == 0) {
 	return 0;
@@ -397,7 +397,7 @@
 }
 
 static long
-str_bytesize(string_t *self)
+str_bytesize(rb_str_t *self)
 {
     if (str_is_stored_in_uchars(self)) {
 	if (UTF16_ENC(self->encoding)) {
@@ -413,7 +413,7 @@
 }
 
 static bool
-str_getbyte(string_t *self, long index, unsigned char *c)
+str_getbyte(rb_str_t *self, long index, unsigned char *c)
 {
     if (str_is_stored_in_uchars(self) && NATIVE_UTF16_ENC(self->encoding)) {
 	if (index < 0) {
@@ -458,7 +458,7 @@
 }
 
 static void
-str_setbyte(string_t *self, long index, unsigned char value)
+str_setbyte(rb_str_t *self, long index, unsigned char value)
 {
     str_make_data_binary(self);
     if ((index < -self->length_in_bytes) || (index >= self->length_in_bytes)) {
@@ -471,7 +471,7 @@
 }
 
 static void
-str_force_encoding(string_t *self, encoding_t *enc)
+str_force_encoding(rb_str_t *self, rb_encoding_t *enc)
 {
     if (enc == self->encoding) {
 	return;
@@ -487,20 +487,20 @@
     }
 }
 
-static string_t *
-str_new_similar_empty_string(string_t *self)
+static rb_str_t *
+str_new_similar_empty_string(rb_str_t *self)
 {
-    string_t *str = str_alloc();
+    rb_str_t *str = str_alloc();
     str->encoding = self->encoding;
     str->flags = self->flags & STRING_REQUIRED_FLAGS;
     return str;
 }
 
-static string_t *
-str_new_copy_of_part(string_t *self, long offset_in_bytes,
+static rb_str_t *
+str_new_copy_of_part(rb_str_t *self, long offset_in_bytes,
 	long length_in_bytes)
 {
-    string_t *str = str_alloc();
+    rb_str_t *str = str_alloc();
     str->encoding = self->encoding;
     str->capacity_in_bytes = str->length_in_bytes = length_in_bytes;
     str->flags = self->flags & STRING_REQUIRED_FLAGS;
@@ -520,7 +520,7 @@
 }
 
 static character_boundaries_t
-str_get_character_boundaries(string_t *self, long index, bool ucs2_mode)
+str_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode)
 {
     character_boundaries_t boundaries = {-1, -1};
 
@@ -647,8 +647,8 @@
     return boundaries;
 }
 
-static string_t *
-str_get_characters(string_t *self, long first, long last, bool ucs2_mode)
+static rb_str_t *
+str_get_characters(rb_str_t *self, long first, long last, bool ucs2_mode)
 {
     if (self->length_in_bytes == 0) {
 	if (first == 0) {
@@ -696,8 +696,8 @@
 	    - first_boundaries.start_offset_in_bytes);
 }
 
-static string_t *
-str_get_character_at(string_t *self, long index, bool ucs2_mode)
+static rb_str_t *
+str_get_character_at(rb_str_t *self, long index, bool ucs2_mode)
 {
     if (self->length_in_bytes == 0) {
 	return NULL;
@@ -736,12 +736,12 @@
 	    - boundaries.start_offset_in_bytes);
 }
 
-static string_t *
-str_plus_string(string_t *str1, string_t *str2)
+static rb_str_t *
+str_plus_string(rb_str_t *str1, rb_str_t *str2)
 {
-    encoding_t *new_encoding = str_must_have_compatible_encoding(str1, str2);
+    rb_encoding_t *new_encoding = str_must_have_compatible_encoding(str1, str2);
 
-    string_t *new_str = str_alloc();
+    rb_str_t *new_str = str_alloc();
     new_str->encoding = new_encoding;
     if ((str1->length_in_bytes == 0) && (str2->length_in_bytes == 0)) {
 	return new_str;
@@ -765,7 +765,7 @@
 }
 
 static void
-str_concat_string(string_t *self, string_t *str)
+str_concat_string(rb_str_t *self, rb_str_t *str)
 {
     if (str->length_in_bytes == 0) {
 	return;
@@ -795,7 +795,7 @@
 }
 
 static bool
-str_is_equal_to_string(string_t *self, string_t *str)
+str_is_equal_to_string(rb_str_t *self, rb_str_t *str)
 {
     if (self == str) {
 	return true;
@@ -848,7 +848,7 @@
 }
 
 static long
-str_offset_in_bytes_to_index(string_t *self, long offset_in_bytes,
+str_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes,
 	bool ucs2_mode)
 {
     if ((offset_in_bytes >= self->length_in_bytes) || (offset_in_bytes < 0)) {
@@ -905,7 +905,7 @@
 }
 
 static long
-str_offset_in_bytes_for_string(string_t *self, string_t *searched,
+str_offset_in_bytes_for_string(rb_str_t *self, rb_str_t *searched,
 	long start_offset_in_bytes)
 {
     if (start_offset_in_bytes >= self->length_in_bytes) {
@@ -943,7 +943,7 @@
 }
 
 static long
-str_index_for_string(string_t *self, string_t *searched, long start_index,
+str_index_for_string(rb_str_t *self, rb_str_t *searched, long start_index,
 	bool ucs2_mode)
 {
     str_must_have_compatible_encoding(self, searched);
@@ -978,22 +978,22 @@
 }
 
 static bool
-str_include_string(string_t *self, string_t *searched)
+str_include_string(rb_str_t *self, rb_str_t *searched)
 {
     return (str_offset_in_bytes_for_string(self, searched, 0) != -1);
 }
 
-static string_t *
+static rb_str_t *
 str_need_string(VALUE str)
 {
     if (IS_RSTR(str)) {
-	return (string_t *)str;
+	return (rb_str_t *)str;
     }
     if (TYPE(str) != T_STRING) {
 	str = rb_str_to_str(str);
     }
     if (IS_RSTR(str)) {
-	return (string_t *)str;
+	return (rb_str_t *)str;
     }
     return str_new_from_cfstring((CFStringRef)str);
 }
@@ -1009,7 +1009,7 @@
     }
     assert(IS_RSTR(str1)); // TODO
     assert(IS_RSTR(str2)); // TODO
-    encoding_t *encoding = str_compatible_encoding(RSTR(str1), RSTR(str2));
+    rb_encoding_t *encoding = str_compatible_encoding(RSTR(str1), RSTR(str2));
     if (encoding == NULL) {
 	return Qnil;
     }
@@ -1095,11 +1095,11 @@
 static VALUE
 mr_str_force_encoding(VALUE self, SEL sel, VALUE encoding)
 {
-    encoding_t *enc;
+    rb_encoding_t *enc;
     if (SPECIAL_CONST_P(encoding) || (CLASS_OF(encoding) != rb_cEncoding)) {
 	abort(); // TODO
     }
-    enc = (encoding_t *)encoding;
+    enc = (rb_encoding_t *)encoding;
     str_force_encoding(RSTR(self), enc);
     return self;
 }
@@ -1119,7 +1119,7 @@
 static VALUE
 mr_str_aref(VALUE self, SEL sel, int argc, VALUE *argv)
 {
-    string_t *ret;
+    rb_str_t *ret;
     if (argc == 1) {
 	VALUE index = argv[0];
 	switch (TYPE(index)) {
@@ -1133,13 +1133,13 @@
 	    case T_STRING:
 		{
 		    if (IS_RSTR(index)) {
-			string_t *searched = RSTR(index);
+			rb_str_t *searched = RSTR(index);
 			if (str_include_string(RSTR(self), searched)) {
 			    return (VALUE)str_new_from_string(searched);
 			}
 		    }
 		    else {
-			string_t *searched =
+			rb_str_t *searched =
 			    str_new_from_cfstring((CFStringRef)index);
 			if (str_include_string(RSTR(self), searched)) {
 			    // no need to duplicate the string as we just
@@ -1211,7 +1211,7 @@
     if (argc == 2) {
 	start_index = NUM2LONG(argv[1]);
     }
-    string_t *searched = str_need_string(rb_searched);
+    rb_str_t *searched = str_need_string(rb_searched);
 
     long index = str_index_for_string(RSTR(self), searched, start_index, true);
     if (index == -1) {
@@ -1225,7 +1225,7 @@
 static VALUE
 mr_str_getchar(VALUE self, SEL sel, VALUE index)
 {
-    string_t *ret = str_get_character_at(RSTR(self), FIX2LONG(index), false);
+    rb_str_t *ret = str_get_character_at(RSTR(self), FIX2LONG(index), false);
     if (ret == NULL) {
 	return Qnil;
     }
@@ -1262,7 +1262,7 @@
     }
 
     if (TYPE(compared_to) == T_STRING) {
-	string_t *str;
+	rb_str_t *str;
 	if (IS_RSTR(compared_to)) {
 	    str = RSTR(compared_to);
 	}
@@ -1369,7 +1369,7 @@
 VALUE
 rb_str_new(const char *cstr, long len)
 {
-    string_t *str = str_alloc();
+    rb_str_t *str = str_alloc();
     str_replace_with_bytes(str, cstr, len, ENCODING_BINARY);
     return (VALUE)str;
 }
@@ -1389,7 +1389,7 @@
 VALUE
 rb_str_new3(VALUE source)
 {
-    string_t *str = str_alloc();
+    rb_str_t *str = str_alloc();
     str_replace(str, source);
     return (VALUE)str;
 }
@@ -1420,7 +1420,7 @@
 rb_usascii_str_new(const char *cstr, long len)
 {
     VALUE str = rb_str_new(cstr, len);
-    RSTR(str)->encoding = encodings[ENCODING_ASCII];
+    RSTR(str)->encoding = rb_encodings[ENCODING_ASCII];
     return str;
 }
 
@@ -1502,6 +1502,15 @@
     return str;
 }
 
+void
+rb_str_setter(VALUE val, ID id, VALUE *var)
+{
+    if (!NIL_P(val) && TYPE(val) != T_STRING) {
+	rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
+    }
+    *var = val;
+}
+
 ID
 rb_to_id(VALUE name)
 {

Modified: MacRuby/branches/icu/ucnv.c
===================================================================
--- MacRuby/branches/icu/ucnv.c	2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/ucnv.c	2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,3 +1,14 @@
+/* 
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
 #include "encoding.h"
 #include "unicode/ucnv.h"
 
@@ -17,7 +28,7 @@
     ucnv_reset(cnv);
 
 static void
-str_ucnv_update_flags(string_t *self)
+str_ucnv_update_flags(rb_str_t *self)
 {
     assert(!str_is_stored_in_uchars(self));
 
@@ -62,20 +73,23 @@
 }
 
 static void
-str_ucnv_make_data_binary(string_t *self)
+str_ucnv_make_data_binary(rb_str_t *self)
 {
     assert(str_is_stored_in_uchars(self));
 
     USE_CONVERTER(cnv, self);
 
     UErrorCode err = U_ZERO_ERROR;
-    long capa = UCNV_GET_MAX_BYTES_FOR_STRING(BYTES_TO_UCHARS(self->length_in_bytes), ucnv_getMaxCharSize(cnv));
+    long capa = UCNV_GET_MAX_BYTES_FOR_STRING(BYTES_TO_UCHARS(
+		self->length_in_bytes), ucnv_getMaxCharSize(cnv));
     char *buffer = xmalloc(capa);
     const UChar *source_pos = self->data.uchars;
-    const UChar *source_end = self->data.uchars + BYTES_TO_UCHARS(self->length_in_bytes);
+    const UChar *source_end = self->data.uchars
+	+ BYTES_TO_UCHARS(self->length_in_bytes);
     char *target_pos = buffer;
     char *target_end = buffer + capa;
-    ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end, NULL, true, &err);
+    ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+	    NULL, true, &err);
     // there should never be any conversion error here
     // (if there's one it means some checking has been forgotten before)
     assert(U_SUCCESS(err));
@@ -89,11 +103,12 @@
 }
 
 static long
-utf16_bytesize_approximation(encoding_t *enc, int bytesize)
+utf16_bytesize_approximation(rb_encoding_t *enc, int bytesize)
 {
     long approximation;
     if (UTF16_ENC(enc)) {
-	approximation = bytesize; // the bytesize in UTF-16 is the same whatever the endianness
+	approximation = bytesize; // the bytesize in UTF-16 is the same
+				  // whatever the endianness
     }
     else if (UTF32_ENC(enc)) {
 	// the bytesize in UTF-16 is nearly half of the bytesize in UTF-32
@@ -114,13 +129,14 @@
 }
 
 static bool
-str_ucnv_try_making_data_uchars(string_t *self)
+str_ucnv_try_making_data_uchars(rb_str_t *self)
 {
     assert(!str_is_stored_in_uchars(self));
 
     USE_CONVERTER(cnv, self);
 
-    long capa = utf16_bytesize_approximation(self->encoding, self->length_in_bytes);
+    long capa = utf16_bytesize_approximation(self->encoding,
+	    self->length_in_bytes);
     const char *source_pos = self->data.bytes;
     const char *source_end = self->data.bytes + self->length_in_bytes;
     UChar *buffer = xmalloc(capa);
@@ -129,7 +145,8 @@
     for (;;) {
 	UChar *target_end = buffer + BYTES_TO_UCHARS(capa);
 	err = U_ZERO_ERROR;
-	ucnv_toUnicode(cnv, &target_pos, target_end, &source_pos, source_end, NULL, true, &err);
+	ucnv_toUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+		NULL, true, &err);
 	if (err == U_BUFFER_OVERFLOW_ERROR) {
 	    long index = target_pos - buffer;
 	    capa *= 2; // double the buffer's size
@@ -149,18 +166,16 @@
 	self->capacity_in_bytes = capa;
 	self->length_in_bytes = UCHARS_TO_BYTES(target_pos - buffer);
 	GC_WB(&self->data.uchars, buffer);
-
 	return true;
     }
     else {
 	str_set_valid_encoding(self, false);
-
 	return false;
     }
 }
 
 static long
-str_ucnv_length(string_t *self, bool ucs2_mode)
+str_ucnv_length(rb_str_t *self, bool ucs2_mode)
 {
     assert(!str_is_stored_in_uchars(self));
 
@@ -204,12 +219,13 @@
 
 #define STACK_BUFFER_SIZE 1024
 static long
-str_ucnv_bytesize(string_t *self)
+str_ucnv_bytesize(rb_str_t *self)
 {
     assert(str_is_stored_in_uchars(self));
 
     // for strings stored in UTF-16 for which the Ruby encoding is not UTF-16,
-    // we have to convert back the string in its original encoding to get the length in bytes
+    // we have to convert back the string in its original encoding to get the
+    // length in bytes
     USE_CONVERTER(cnv, self);
 
     UErrorCode err = U_ZERO_ERROR;
@@ -217,12 +233,14 @@
     long len = 0;
     char buffer[STACK_BUFFER_SIZE];
     const UChar *source_pos = self->data.uchars;
-    const UChar *source_end = self->data.uchars + BYTES_TO_UCHARS(self->length_in_bytes);
+    const UChar *source_end = self->data.uchars + BYTES_TO_UCHARS(
+	    self->length_in_bytes);
     char *target_end = buffer + STACK_BUFFER_SIZE;
     for (;;) {
 	err = U_ZERO_ERROR;
 	char *target_pos = buffer;
-	ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end, NULL, true, &err);
+	ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+		NULL, true, &err);
 	len += target_pos - buffer;
 	if (err != U_BUFFER_OVERFLOW_ERROR) {
 	    // if the convertion failed, a check was missing somewhere
@@ -236,7 +254,7 @@
 }
 
 static character_boundaries_t
-str_ucnv_get_character_boundaries(string_t *self, long index, bool ucs2_mode)
+str_ucnv_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode)
 {
     assert(!str_is_stored_in_uchars(self));
 
@@ -281,19 +299,24 @@
 		    length_in_bytes = min_char_size;
 		}
 		boundaries.start_offset_in_bytes = offset_in_bytes;
-		boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes + length_in_bytes;
+		boundaries.end_offset_in_bytes =
+		    boundaries.start_offset_in_bytes + length_in_bytes;
 		break;
 	    }
 	    else if (current_index + diff > index) {
-		long adjusted_offset = offset_in_bytes + (index - current_index) * min_char_size;
-		if (adjusted_offset + min_char_size > offset_in_bytes + converted_width) {
-		    length_in_bytes = offset_in_bytes + converted_width - adjusted_offset;
+		long adjusted_offset = offset_in_bytes + (index
+			- current_index) * min_char_size;
+		if (adjusted_offset + min_char_size > offset_in_bytes
+			+ converted_width) {
+		    length_in_bytes = offset_in_bytes + converted_width
+			- adjusted_offset;
 		}
 		else {
 		    length_in_bytes = min_char_size;
 		}
 		boundaries.start_offset_in_bytes = adjusted_offset;
-		boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes + length_in_bytes;
+		boundaries.end_offset_in_bytes =
+		    boundaries.start_offset_in_bytes + length_in_bytes;
 		break;
 	    }
 	    current_index += diff;
@@ -306,7 +329,8 @@
 		    break;
 		}
 		else if (current_index+1 == index) {
-		    boundaries.end_offset_in_bytes = offset_in_bytes + converted_width;
+		    boundaries.end_offset_in_bytes = offset_in_bytes
+			+ converted_width;
 		    break;
 		}
 		++current_index;
@@ -314,7 +338,8 @@
 
 	    if (current_index == index) {
 		boundaries.start_offset_in_bytes = offset_in_bytes;
-		boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes + converted_width;
+		boundaries.end_offset_in_bytes =
+		    boundaries.start_offset_in_bytes + converted_width;
 		break;
 	    }
 
@@ -328,7 +353,8 @@
 }
 
 static long
-str_ucnv_offset_in_bytes_to_index(string_t *self, long offset_in_bytes, bool ucs2_mode)
+str_ucnv_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes,
+	bool ucs2_mode)
 {
     assert(!str_is_stored_in_uchars(self));
 
@@ -347,7 +373,8 @@
 	if (err == U_INDEX_OUTOFBOUNDS_ERROR) {
 	    // end of the string
 	    // should not happen because str_offset_in_bytes_to_index
-	    // checks before that offset_in_bytes is inferior to the length in bytes
+	    // checks before that offset_in_bytes is inferior to the length
+	    // in bytes
 	    abort();
 	}
 	else if (U_FAILURE(err)) {
@@ -383,20 +410,23 @@
 }
 
 void
-enc_init_ucnv_encoding(encoding_t *encoding)
+enc_init_ucnv_encoding(rb_encoding_t *encoding)
 {
     // create the ICU converter
     UErrorCode err = U_ZERO_ERROR;
     UConverter *converter = ucnv_open(encoding->public_name, &err);
     if (!U_SUCCESS(err) || (converter == NULL)) {
-	fprintf(stderr, "Couldn't create the encoder for %s\n", encoding->public_name);
+	fprintf(stderr, "Couldn't create the encoder for %s\n",
+		encoding->public_name);
 	abort();
     }
     // stop the conversion when the conversion failed
     err = U_ZERO_ERROR;
-    ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err);
+    ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
+	    &err);
     err = U_ZERO_ERROR;
-    ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &err);
+    ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
+	    NULL, &err);
 
     // fill the fields not filled yet
     encoding->private_data = converter;
@@ -405,6 +435,8 @@
     encoding->methods.try_making_data_uchars = str_ucnv_try_making_data_uchars;
     encoding->methods.length = str_ucnv_length;
     encoding->methods.bytesize = str_ucnv_bytesize;
-    encoding->methods.get_character_boundaries = str_ucnv_get_character_boundaries;
-    encoding->methods.offset_in_bytes_to_index = str_ucnv_offset_in_bytes_to_index;
+    encoding->methods.get_character_boundaries =
+	str_ucnv_get_character_boundaries;
+    encoding->methods.offset_in_bytes_to_index =
+	str_ucnv_offset_in_bytes_to_index;
 }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100216/91db7af3/attachment-0001.html>


More information about the macruby-changes mailing list