[macruby-changes] [3555] MacRuby/branches/icu
source_changes at macosforge.org
source_changes at macosforge.org
Tue Feb 16 14:44:15 PST 2010
Revision: 3555
http://trac.macosforge.org/projects/ruby/changeset/3555
Author: lsansonetti at apple.com
Date: 2010-02-16 14:44:14 -0800 (Tue, 16 Feb 2010)
Log Message:
-----------
more work
Modified Paths:
--------------
MacRuby/branches/icu/encoding.c
MacRuby/branches/icu/encoding.h
MacRuby/branches/icu/include/ruby/encoding.h
MacRuby/branches/icu/marshal.c
MacRuby/branches/icu/re.c
MacRuby/branches/icu/ruby.c
MacRuby/branches/icu/string.c
MacRuby/branches/icu/ucnv.c
Modified: MacRuby/branches/icu/encoding.c
===================================================================
--- MacRuby/branches/icu/encoding.c 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/encoding.c 2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,31 +1,37 @@
+/*
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ *
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
#include "encoding.h"
#include <string.h>
-// TODO:
-// - use rb_usascii_str_new_cstr instead of rb_str_new2
+VALUE rb_cEncoding;
-VALUE rb_cEncoding = 0;
+static rb_encoding_t *default_internal = NULL;
+static rb_encoding_t *default_external = NULL;
+rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
-#define ENC(x) ((encoding_t *)(x))
+static void str_undefined_update_flags(rb_str_t *self) { abort(); }
+static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
+static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
+static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
+static long str_undefined_bytesize(rb_str_t *self) { abort(); }
+static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
+static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
-encoding_t *default_internal = NULL;
-encoding_t *default_external = NULL;
-encoding_t *encodings[ENCODINGS_COUNT];
-
-static void str_undefined_update_flags(string_t *self) { abort(); }
-static void str_undefined_make_data_binary(string_t *self) { abort(); }
-static bool str_undefined_try_making_data_uchars(string_t *self) { abort(); }
-static long str_undefined_length(string_t *self, bool ucs2_mode) { abort(); }
-static long str_undefined_bytesize(string_t *self) { abort(); }
-static character_boundaries_t str_undefined_get_character_boundaries(string_t *self, long index, bool ucs2_mode) { abort(); }
-static long str_undefined_offset_in_bytes_to_index(string_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
-
static VALUE
mr_enc_s_list(VALUE klass, SEL sel)
{
VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
- rb_ary_push(ary, (VALUE)encodings[i]);
+ rb_ary_push(ary, (VALUE)rb_encodings[i]);
}
return ary;
}
@@ -35,11 +41,11 @@
{
VALUE ary = rb_ary_new();
for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
- encoding_t *encoding = ENC(encodings[i]);
+ rb_encoding_t *encoding = RENC(rb_encodings[i]);
// TODO: use US-ASCII strings
- rb_ary_push(ary, rb_str_new2(encoding->public_name));
+ rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
- rb_ary_push(ary, rb_str_new2(encoding->aliases[j]));
+ rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
}
}
return ary;
@@ -50,11 +56,10 @@
{
VALUE hash = rb_hash_new();
for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
- encoding_t *encoding = ENC(encodings[i]);
+ rb_encoding_t *encoding = RENC(rb_encodings[i]);
for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
- rb_hash_aset(hash,
- rb_str_new2(encoding->aliases[j]),
- rb_str_new2(encoding->public_name));
+ rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
+ rb_usascii_str_new2(encoding->public_name));
}
}
return hash;
@@ -75,25 +80,25 @@
static VALUE
mr_enc_name(VALUE self, SEL sel)
{
- return rb_str_new2(ENC(self)->public_name);
+ return rb_usascii_str_new2(RENC(self)->public_name);
}
static VALUE
mr_enc_inspect(VALUE self, SEL sel)
{
return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
- ENC(self)->public_name);
+ RENC(self)->public_name);
}
static VALUE
mr_enc_names(VALUE self, SEL sel)
{
- encoding_t *encoding = ENC(self);
+ rb_encoding_t *encoding = RENC(self);
VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
- rb_ary_push(ary, rb_str_new2(encoding->public_name));
+ rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
- rb_ary_push(ary, rb_str_new2(encoding->aliases[i]));
+ rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
}
return ary;
}
@@ -101,7 +106,7 @@
static VALUE
mr_enc_ascii_compatible_p(VALUE self, SEL sel)
{
- return ENC(self)->ascii_compatible ? Qtrue : Qfalse;
+ return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
}
static VALUE
@@ -111,7 +116,7 @@
}
static void
-define_encoding_constant(const char *name, encoding_t *encoding)
+define_encoding_constant(const char *name, rb_encoding_t *encoding)
{
char c = name[0];
if ((c >= '0') && (c <= '9')) {
@@ -135,7 +140,7 @@
free(name_copy);
}
-extern void enc_init_ucnv_encoding(encoding_t *encoding);
+extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
enum {
ENCODING_TYPE_SPECIAL = 0,
@@ -146,7 +151,7 @@
add_encoding(
unsigned int encoding_index, // index of the encoding in the encodings
// array
- unsigned int encoding_type,
+ unsigned int rb_encoding_type,
const char *public_name, // public name for the encoding
unsigned char min_char_size,
bool single_byte_encoding, // in the encoding a character takes only
@@ -175,11 +180,11 @@
va_end(va_aliases);
// create the MacRuby object
- NEWOBJ(encoding, encoding_t);
+ NEWOBJ(encoding, rb_encoding_t);
encoding->basic.flags = 0;
encoding->basic.klass = rb_cEncoding;
- encodings[encoding_index] = encoding;
- rb_objc_retain(encoding); // it should never be deallocated
+ rb_encodings[encoding_index] = encoding;
+ GC_RETAIN(encoding); // it should never be deallocated
// fill the fields
encoding->index = encoding_index;
@@ -202,7 +207,7 @@
encoding->methods.offset_in_bytes_to_index =
str_undefined_offset_in_bytes_to_index;
- switch (encoding_type) {
+ switch (rb_encoding_type) {
case ENCODING_TYPE_SPECIAL:
break;
case ENCODING_TYPE_UCNV:
@@ -236,8 +241,8 @@
//add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
//add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
- default_external = encodings[ENCODING_UTF8];
- default_internal = encodings[ENCODING_UTF8];
+ default_external = rb_encodings[ENCODING_UTF8];
+ default_internal = rb_encodings[ENCODING_UTF8];
}
VALUE
@@ -280,3 +285,96 @@
create_encodings();
}
+
+// MRI C-API compatibility.
+
+rb_encoding_t *
+rb_enc_find(const char *name)
+{
+ for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
+ rb_encoding_t *enc = rb_encodings[i];
+ if (strcasecmp(enc->public_name, name) == 0) {
+ return enc;
+ }
+ for (unsigned int j = 0; j < enc->aliases_count; j++) {
+ const char *alias = enc->aliases[j];
+ if (strcasecmp(alias, name) == 0) {
+ return enc;
+ }
+ }
+ }
+ return NULL;
+}
+
+VALUE
+rb_enc_from_encoding(rb_encoding_t *enc)
+{
+ return (VALUE)enc;
+}
+
+rb_encoding_t *
+rb_enc_get(VALUE obj)
+{
+ if (IS_RSTR(obj)) {
+ return RSTR(obj)->encoding;
+ }
+ // TODO support symbols
+ return NULL;
+}
+
+rb_encoding_t *
+rb_to_encoding(VALUE obj)
+{
+ rb_encoding_t *enc;
+ if (CLASS_OF(obj) == rb_cEncoding) {
+ enc = RENC(obj);
+ }
+ else {
+ StringValue(obj);
+ enc = rb_enc_find(RSTRING_PTR(obj));
+ if (enc == NULL) {
+ rb_raise(rb_eArgError, "unknown encoding name - %s",
+ RSTRING_PTR(obj));
+ }
+ }
+ return enc;
+}
+
+const char *
+rb_enc_name(rb_encoding_t *enc)
+{
+ return RENC(enc)->public_name;
+}
+
+VALUE
+rb_enc_name2(rb_encoding_t *enc)
+{
+ return rb_usascii_str_new2(rb_enc_name(enc));
+}
+
+long
+rb_enc_mbminlen(rb_encoding_t *enc)
+{
+ return enc->min_char_size;
+}
+
+long
+rb_enc_mbmaxlen(rb_encoding_t *enc)
+{
+ return enc->single_byte_encoding ? 1 : 10; // XXX 10?
+}
+
+rb_encoding_t *
+rb_locale_encoding(void)
+{
+ // XXX
+ return rb_encodings[ENCODING_UTF8];
+}
+
+void
+rb_enc_set_default_external(VALUE encoding)
+{
+ assert(CLASS_OF(encoding) == rb_cEncoding);
+ default_external = RENC(encoding);
+}
+
Modified: MacRuby/branches/icu/encoding.h
===================================================================
--- MacRuby/branches/icu/encoding.h 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/encoding.h 2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,3 +1,14 @@
+/*
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ *
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
#ifndef __ENCODING_H_
#define __ENCODING_H_
@@ -22,24 +33,24 @@
#endif
#define NATIVE_UTF16_ENC(encoding) \
- ((encoding) == encodings[ENCODING_UTF16_NATIVE])
+ ((encoding) == rb_encodings[ENCODING_UTF16_NATIVE])
#define NON_NATIVE_UTF16_ENC(encoding) \
- ((encoding) == encodings[ENCODING_UTF16_NON_NATIVE])
+ ((encoding) == rb_encodings[ENCODING_UTF16_NON_NATIVE])
#define UTF16_ENC(encoding) \
(NATIVE_UTF16_ENC(encoding) || NON_NATIVE_UTF16_ENC(encoding))
#define NATIVE_UTF32_ENC(encoding) \
- ((encoding) == encodings[ENCODING_UTF32_NATIVE])
+ ((encoding) == rb_encodings[ENCODING_UTF32_NATIVE])
#define NON_NATIVE_UTF32_ENC(encoding) \
- ((encoding) == encodings[ENCODING_UTF32_NON_NATIVE])
+ ((encoding) == rb_encodings[ENCODING_UTF32_NON_NATIVE])
#define UTF32_ENC(encoding) \
(NATIVE_UTF32_ENC(encoding) || NON_NATIVE_UTF32_ENC(encoding))
-#define BINARY_ENC(encoding) ((encoding) == encodings[ENCODING_BINARY])
+#define BINARY_ENC(encoding) ((encoding) == rb_encodings[ENCODING_BINARY])
typedef uint8_t str_flag_t;
-typedef struct {
+typedef struct {
struct RBasic basic;
- struct encoding_s *encoding;
+ struct rb_encoding *encoding;
long capacity_in_bytes;
long length_in_bytes;
union {
@@ -47,9 +58,9 @@
UChar *uchars;
} data;
str_flag_t flags;
-} string_t;
+} rb_str_t;
-#define RSTR(x) ((string_t *)x)
+#define RSTR(x) ((rb_str_t *)x)
static inline bool
rb_klass_is_rstr(VALUE klass)
@@ -75,16 +86,16 @@
} character_boundaries_t;
typedef struct {
- void (*update_flags)(string_t *);
- void (*make_data_binary)(string_t *);
- bool (*try_making_data_uchars)(string_t *);
- long (*length)(string_t *, bool);
- long (*bytesize)(string_t *);
- character_boundaries_t (*get_character_boundaries)(string_t *, long, bool);
- long (*offset_in_bytes_to_index)(string_t *, long, bool);
+ void (*update_flags)(rb_str_t *);
+ void (*make_data_binary)(rb_str_t *);
+ bool (*try_making_data_uchars)(rb_str_t *);
+ long (*length)(rb_str_t *, bool);
+ long (*bytesize)(rb_str_t *);
+ character_boundaries_t (*get_character_boundaries)(rb_str_t *, long, bool);
+ long (*offset_in_bytes_to_index)(rb_str_t *, long, bool);
} encoding_methods_t;
-typedef struct encoding_s {
+typedef struct rb_encoding {
struct RBasic basic;
unsigned int index;
const char *public_name;
@@ -95,8 +106,10 @@
bool ascii_compatible : 1;
encoding_methods_t methods;
void *private_data;
-} encoding_t;
+} rb_encoding_t;
+#define RENC(x) ((rb_encoding_t *)(x))
+
enum {
ENCODING_BINARY = 0,
ENCODING_ASCII,
@@ -114,10 +127,8 @@
ENCODINGS_COUNT
};
-extern encoding_t *encodings[ENCODINGS_COUNT];
+extern rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
-extern VALUE rb_cMREncoding;
-
#define STRING_HAS_SUPPLEMENTARY 0x020
#define STRING_HAS_SUPPLEMENTARY_SET 0x010
#define STRING_ASCII_ONLY 0x008
@@ -140,31 +151,31 @@
return ((a) + (b - 1)) / b;
}
-void str_update_flags(string_t *self);
+void str_update_flags(rb_str_t *self);
static inline void
-str_unset_facultative_flags(string_t *self)
+str_unset_facultative_flags(rb_str_t *self)
{
self->flags &= ~STRING_HAS_SUPPLEMENTARY_SET & ~STRING_ASCII_ONLY_SET
& ~STRING_VALID_ENCODING_SET;
}
static inline bool
-str_known_to_have_an_invalid_encoding(string_t *self)
+str_known_to_have_an_invalid_encoding(rb_str_t *self)
{
return (self->flags & (STRING_VALID_ENCODING_SET
| STRING_VALID_ENCODING)) == STRING_VALID_ENCODING_SET;
}
static inline bool
-str_known_not_to_have_any_supplementary(string_t *self)
+str_known_not_to_have_any_supplementary(rb_str_t *self)
{
return (self->flags & (STRING_HAS_SUPPLEMENTARY_SET
| STRING_HAS_SUPPLEMENTARY)) == STRING_HAS_SUPPLEMENTARY_SET;
}
static inline bool
-str_check_flag_and_update_if_needed(string_t *self, str_flag_t flag_set,
+str_check_flag_and_update_if_needed(rb_str_t *self, str_flag_t flag_set,
str_flag_t flag)
{
if (!(self->flags & flag_set)) {
@@ -175,21 +186,21 @@
}
static inline bool
-str_is_valid_encoding(string_t *self)
+str_is_valid_encoding(rb_str_t *self)
{
return str_check_flag_and_update_if_needed(self, STRING_VALID_ENCODING_SET,
STRING_VALID_ENCODING);
}
static inline bool
-str_is_ascii_only(string_t *self)
+str_is_ascii_only(rb_str_t *self)
{
return str_check_flag_and_update_if_needed(self, STRING_ASCII_ONLY_SET,
STRING_ASCII_ONLY);
}
static inline bool
-str_is_ruby_ascii_only(string_t *self)
+str_is_ruby_ascii_only(rb_str_t *self)
{
// for MRI, a string in a non-ASCII-compatible encoding (like UTF-16)
// containing only ASCII characters is not "ASCII only" though for us it
@@ -201,19 +212,19 @@
}
static inline bool
-str_is_stored_in_uchars(string_t *self)
+str_is_stored_in_uchars(rb_str_t *self)
{
return self->flags & STRING_STORED_IN_UCHARS;
}
static inline void
-str_negate_stored_in_uchars(string_t *self)
+str_negate_stored_in_uchars(rb_str_t *self)
{
self->flags ^= STRING_STORED_IN_UCHARS;
}
static inline void
-str_set_stored_in_uchars(string_t *self, bool status)
+str_set_stored_in_uchars(rb_str_t *self, bool status)
{
if (status) {
self->flags |= STRING_STORED_IN_UCHARS;
@@ -224,7 +235,7 @@
}
static inline void
-str_set_facultative_flag(string_t *self, bool status, str_flag_t flag_set,
+str_set_facultative_flag(rb_str_t *self, bool status, str_flag_t flag_set,
str_flag_t flag)
{
if (status) {
@@ -236,21 +247,21 @@
}
static inline void
-str_set_has_supplementary(string_t *self, bool status)
+str_set_has_supplementary(rb_str_t *self, bool status)
{
str_set_facultative_flag(self, status, STRING_HAS_SUPPLEMENTARY_SET,
STRING_HAS_SUPPLEMENTARY);
}
static inline void
-str_set_ascii_only(string_t *self, bool status)
+str_set_ascii_only(rb_str_t *self, bool status)
{
str_set_facultative_flag(self, status, STRING_ASCII_ONLY_SET,
STRING_ASCII_ONLY);
}
static inline void
-str_set_valid_encoding(string_t *self, bool status)
+str_set_valid_encoding(rb_str_t *self, bool status)
{
str_set_facultative_flag(self, status, STRING_VALID_ENCODING_SET,
STRING_VALID_ENCODING);
Modified: MacRuby/branches/icu/include/ruby/encoding.h
===================================================================
--- MacRuby/branches/icu/include/ruby/encoding.h 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/include/ruby/encoding.h 2010-02-16 22:44:14 UTC (rev 3555)
@@ -16,83 +16,12 @@
extern "C" {
#endif
-#ifdef HAVE_STDARG_PROTOTYPES
-# include <stdarg.h>
-#else
-# include <varargs.h>
-#endif
+#include <stdarg.h>
-#if WITH_OBJC
+typedef struct rb_encoding rb_encoding;
-#include <wctype.h>
-
-typedef CFStringEncoding rb_encoding;
-
-#else
-
-#include "ruby/oniguruma.h"
-
-#define ENCODING_INLINE_MAX 1023
-#define ENCODING_SHIFT (FL_USHIFT+10)
-#define ENCODING_MASK (ENCODING_INLINE_MAX<<ENCODING_SHIFT)
-
-#define ENCODING_SET_INLINED(obj,i) do {\
- RBASIC(obj)->flags &= ~ENCODING_MASK;\
- RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\
-} while (0)
-#define ENCODING_SET(obj,i) do {\
- VALUE rb_encoding_set_obj = (obj); \
- int encoding_set_enc_index = (i); \
- if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
- ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
- else \
- rb_enc_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
-} while (0)
-
-#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
-#define ENCODING_GET(obj) \
- (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
- ENCODING_GET_INLINED(obj) : \
- rb_enc_get_index(obj))
-
-#if WITH_OBJC
-# define ENCODING_IS_ASCII8BIT(obj) (1)
-#else
-# define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
-#endif
-
-#define ENC_CODERANGE_MASK (FL_USER8|FL_USER9)
-#define ENC_CODERANGE_UNKNOWN 0
-#define ENC_CODERANGE_7BIT FL_USER8
-#define ENC_CODERANGE_VALID FL_USER9
-#define ENC_CODERANGE_BROKEN (FL_USER8|FL_USER9)
-#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK)
-#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT)
-#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \
- (RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
-#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
-
-/* assumed ASCII compatiblity */
-#define ENC_CODERANGE_AND(a, b) \
- (a == ENC_CODERANGE_7BIT ? b : \
- a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \
- ENC_CODERANGE_UNKNOWN)
-
-#define ENCODING_CODERANGE_SET(obj, encindex, cr) \
- do { \
- VALUE rb_encoding_coderange_obj = (obj); \
- ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
- ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
- } while (0)
-
-typedef OnigEncodingType rb_encoding;
-#endif
-
-#define ENCODING_MAXNAMELEN 42
-
int rb_enc_replicate(const char *, rb_encoding *);
int rb_define_dummy_encoding(const char *);
-#define rb_enc_to_index(enc) ((enc) ? ((enc)->ruby_encoding_index) : 0)
int rb_enc_get_index(VALUE obj);
void rb_enc_set_index(VALUE obj, int encindex);
int rb_enc_find_index(const char *name);
@@ -120,26 +49,13 @@
/* name -> rb_encoding */
rb_encoding * rb_enc_find(const char *name);
-#if WITH_OBJC
-rb_encoding * rb_enc_find2(VALUE name);
-#endif
-
/* encoding -> name */
-#if WITH_OBJC
const char *rb_enc_name(rb_encoding *);
VALUE rb_enc_name2(rb_encoding *);
-#else
-#define rb_enc_name(enc) (enc)->name
-#endif
/* encoding -> minlen/maxlen */
-#if WITH_OBJC
long rb_enc_mbminlen(rb_encoding *);
long rb_enc_mbmaxlen(rb_encoding *);
-#else
-#define rb_enc_mbminlen(enc) (enc)->min_enc_len
-#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
-#endif
/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
@@ -174,7 +90,8 @@
/* ptr, ptr, encoding -> newline_or_not */
#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end))
-#if WITH_OBJC
+#include <wctype.h>
+
#define rb_enc_isctype(c,t,enc) (iswctype(c,t))
#define rb_enc_isascii(c,enc) (iswascii(c))
#define rb_enc_isalpha(c,enc) (iswalpha(c))
@@ -184,17 +101,6 @@
#define rb_enc_isprint(c,enc) (iswprint(c))
#define rb_enc_isspace(c,enc) (iswspace(c))
#define rb_enc_isdigit(c,enc) (iswdigit(c))
-#else
-#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
-#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
-#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
-#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c)
-#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c)
-#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c)
-#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c)
-#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
-#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
-#endif
#define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc))
@@ -218,31 +124,18 @@
int rb_ascii8bit_encindex(void);
VALUE rb_enc_default_external(void);
void rb_enc_set_default_external(VALUE encoding);
-//VALUE rb_locale_charmap(VALUE klass);
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
VALUE rb_num_to_chr(VALUE, rb_encoding *);
RUBY_EXTERN VALUE rb_cEncoding;
-#define ENC_UNINITIALIZED (&rb_cEncoding)
-#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
-#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
-
-#define ENC_DUMMY_FLAG FL_USER2
-#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
-#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
-
-#if WITH_OBJC
-# define rb_enc_dummy_p(x) (Qfalse)
-#else
static inline int
rb_enc_dummy_p(rb_encoding *enc)
{
- if (!enc_initialized_p(enc)) return Qfalse;
- return ENC_DUMMY_P(ENC_FROM_ENCODING(enc));
+ // TODO
+ return Qfalse;
}
-#endif
VALUE rb_str_transcode(VALUE str, VALUE to);
Modified: MacRuby/branches/icu/marshal.c
===================================================================
--- MacRuby/branches/icu/marshal.c 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/marshal.c 2010-02-16 22:44:14 UTC (rev 3555)
@@ -83,7 +83,15 @@
static ID s_dump_data, s_load_data, s_alloc;
static ID s_getbyte, s_read, s_write, s_binmode;
-ID rb_id_encoding(void);
+static ID
+rb_id_encoding(void)
+{
+ static ID id = 0;
+ if (id == 0) {
+ id = rb_intern("encoding");
+ }
+ return id;
+}
typedef struct {
VALUE newclass;
Modified: MacRuby/branches/icu/re.c
===================================================================
--- MacRuby/branches/icu/re.c 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/re.c 2010-02-16 22:44:14 UTC (rev 3555)
@@ -3750,7 +3750,7 @@
rb_objc_define_method(rb_cRegexp, "source", rb_reg_source, 0);
rb_objc_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
rb_objc_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
- rb_objc_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
+ //rb_objc_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
rb_objc_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
rb_objc_define_method(rb_cRegexp, "names", rb_reg_names, 0);
rb_objc_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
Modified: MacRuby/branches/icu/ruby.c
===================================================================
--- MacRuby/branches/icu/ruby.c 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/ruby.c 2010-02-16 22:44:14 UTC (rev 3555)
@@ -832,7 +832,7 @@
static rb_encoding *
opt_enc_find(VALUE enc_name)
{
- rb_encoding *enc = rb_enc_find2(enc_name);
+ rb_encoding *enc = rb_enc_find(RSTRING_PTR(enc_name));
if (enc == NULL) {
rb_raise(rb_eRuntimeError, "unknown encoding name - %s",
RSTRING_PTR(enc_name));
Modified: MacRuby/branches/icu/string.c
===================================================================
--- MacRuby/branches/icu/string.c 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/string.c 2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,5 +1,5 @@
/*
- * MacRuby implementation of Ruby 1.9's string.c.
+ * MacRuby implementation of Ruby 1.9 String.
*
* This file is covered by the Ruby license. See COPYING for more details.
*
@@ -27,7 +27,7 @@
VALUE rb_fs;
static void
-str_update_flags_utf16(string_t *self)
+str_update_flags_utf16(rb_str_t *self)
{
assert(str_is_stored_in_uchars(self)
|| NON_NATIVE_UTF16_ENC(self->encoding));
@@ -108,7 +108,7 @@
}
void
-str_update_flags(string_t *self)
+str_update_flags(rb_str_t *self)
{
if (self->length_in_bytes == 0) {
str_set_valid_encoding(self, true);
@@ -136,7 +136,7 @@
}
static void
-str_invert_byte_order(string_t *self)
+str_invert_byte_order(rb_str_t *self)
{
assert(NON_NATIVE_UTF16_ENC(self->encoding));
@@ -155,8 +155,8 @@
str_negate_stored_in_uchars(self);
}
-static encoding_t *
-str_compatible_encoding(string_t *str1, string_t *str2)
+static rb_encoding_t *
+str_compatible_encoding(rb_str_t *str1, rb_str_t *str2)
{
if (str1->encoding == str2->encoding) {
return str1->encoding;
@@ -177,10 +177,10 @@
return NULL;
}
-static encoding_t *
-str_must_have_compatible_encoding(string_t *str1, string_t *str2)
+static rb_encoding_t *
+str_must_have_compatible_encoding(rb_str_t *str1, rb_str_t *str2)
{
- encoding_t *new_encoding = str_compatible_encoding(str1, str2);
+ rb_encoding_t *new_encoding = str_compatible_encoding(str1, str2);
if (new_encoding == NULL) {
rb_raise(rb_eEncCompatError,
"incompatible character encodings: %s and %s",
@@ -189,13 +189,13 @@
return new_encoding;
}
-static string_t *
+static rb_str_t *
str_alloc(void)
{
- NEWOBJ(str, string_t);
+ NEWOBJ(str, rb_str_t);
str->basic.flags = 0;
str->basic.klass = rb_cRubyString;
- str->encoding = encodings[ENCODING_BINARY];
+ str->encoding = rb_encodings[ENCODING_BINARY];
str->capacity_in_bytes = 0;
str->length_in_bytes = 0;
str->data.bytes = NULL;
@@ -204,8 +204,8 @@
}
static void
-str_replace_with_bytes(string_t *self, const char *bytes, long len,
- encoding_t *enc)
+str_replace_with_bytes(rb_str_t *self, const char *bytes, long len,
+ rb_encoding_t *enc)
{
assert(len >= 0);
self->flags = 0;
@@ -223,7 +223,7 @@
}
static void
-str_replace_with_string(string_t *self, string_t *source)
+str_replace_with_string(rb_str_t *self, rb_str_t *source)
{
if (self == source) {
return;
@@ -234,10 +234,10 @@
}
static void
-str_replace_with_cfstring(string_t *self, CFStringRef source)
+str_replace_with_cfstring(rb_str_t *self, CFStringRef source)
{
self->flags = 0;
- self->encoding = encodings[ENCODING_UTF16_NATIVE];
+ self->encoding = rb_encodings[ENCODING_UTF16_NATIVE];
self->capacity_in_bytes = self->length_in_bytes =
UCHARS_TO_BYTES(CFStringGetLength(source));
if (self->length_in_bytes != 0) {
@@ -250,7 +250,7 @@
}
static void
-str_replace(string_t *self, VALUE arg)
+str_replace(rb_str_t *self, VALUE arg)
{
if (IS_RSTR(arg)) {
str_replace_with_string(self, RSTR(arg));
@@ -269,38 +269,38 @@
}
}
-static string_t *
+static rb_str_t *
str_dup(VALUE source)
{
- string_t *destination = str_alloc();
+ rb_str_t *destination = str_alloc();
str_replace(destination, source);
return destination;
}
static void
-str_clear(string_t *self)
+str_clear(rb_str_t *self)
{
self->length_in_bytes = 0;
}
-static string_t *
-str_new_from_string(string_t *source)
+static rb_str_t *
+str_new_from_string(rb_str_t *source)
{
- string_t *destination = str_alloc();
+ rb_str_t *destination = str_alloc();
str_replace_with_string(destination, source);
return destination;
}
-static string_t *
+static rb_str_t *
str_new_from_cfstring(CFStringRef source)
{
- string_t *destination = str_alloc();
+ rb_str_t *destination = str_alloc();
str_replace_with_cfstring(destination, source);
return destination;
}
static void
-str_make_data_binary(string_t *self)
+str_make_data_binary(rb_str_t *self)
{
if (!str_is_stored_in_uchars(self) || NATIVE_UTF16_ENC(self->encoding)) {
// nothing to do
@@ -318,7 +318,7 @@
}
static bool
-str_try_making_data_uchars(string_t *self)
+str_try_making_data_uchars(rb_str_t *self)
{
if (str_is_stored_in_uchars(self)) {
return true;
@@ -344,7 +344,7 @@
}
static void
-str_make_same_format(string_t *str1, string_t *str2)
+str_make_same_format(rb_str_t *str1, rb_str_t *str2)
{
if (str_is_stored_in_uchars(str1) != str_is_stored_in_uchars(str2)) {
if (str_is_stored_in_uchars(str1)) {
@@ -359,7 +359,7 @@
}
static long
-str_length(string_t *self, bool ucs2_mode)
+str_length(rb_str_t *self, bool ucs2_mode)
{
if (self->length_in_bytes == 0) {
return 0;
@@ -397,7 +397,7 @@
}
static long
-str_bytesize(string_t *self)
+str_bytesize(rb_str_t *self)
{
if (str_is_stored_in_uchars(self)) {
if (UTF16_ENC(self->encoding)) {
@@ -413,7 +413,7 @@
}
static bool
-str_getbyte(string_t *self, long index, unsigned char *c)
+str_getbyte(rb_str_t *self, long index, unsigned char *c)
{
if (str_is_stored_in_uchars(self) && NATIVE_UTF16_ENC(self->encoding)) {
if (index < 0) {
@@ -458,7 +458,7 @@
}
static void
-str_setbyte(string_t *self, long index, unsigned char value)
+str_setbyte(rb_str_t *self, long index, unsigned char value)
{
str_make_data_binary(self);
if ((index < -self->length_in_bytes) || (index >= self->length_in_bytes)) {
@@ -471,7 +471,7 @@
}
static void
-str_force_encoding(string_t *self, encoding_t *enc)
+str_force_encoding(rb_str_t *self, rb_encoding_t *enc)
{
if (enc == self->encoding) {
return;
@@ -487,20 +487,20 @@
}
}
-static string_t *
-str_new_similar_empty_string(string_t *self)
+static rb_str_t *
+str_new_similar_empty_string(rb_str_t *self)
{
- string_t *str = str_alloc();
+ rb_str_t *str = str_alloc();
str->encoding = self->encoding;
str->flags = self->flags & STRING_REQUIRED_FLAGS;
return str;
}
-static string_t *
-str_new_copy_of_part(string_t *self, long offset_in_bytes,
+static rb_str_t *
+str_new_copy_of_part(rb_str_t *self, long offset_in_bytes,
long length_in_bytes)
{
- string_t *str = str_alloc();
+ rb_str_t *str = str_alloc();
str->encoding = self->encoding;
str->capacity_in_bytes = str->length_in_bytes = length_in_bytes;
str->flags = self->flags & STRING_REQUIRED_FLAGS;
@@ -520,7 +520,7 @@
}
static character_boundaries_t
-str_get_character_boundaries(string_t *self, long index, bool ucs2_mode)
+str_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode)
{
character_boundaries_t boundaries = {-1, -1};
@@ -647,8 +647,8 @@
return boundaries;
}
-static string_t *
-str_get_characters(string_t *self, long first, long last, bool ucs2_mode)
+static rb_str_t *
+str_get_characters(rb_str_t *self, long first, long last, bool ucs2_mode)
{
if (self->length_in_bytes == 0) {
if (first == 0) {
@@ -696,8 +696,8 @@
- first_boundaries.start_offset_in_bytes);
}
-static string_t *
-str_get_character_at(string_t *self, long index, bool ucs2_mode)
+static rb_str_t *
+str_get_character_at(rb_str_t *self, long index, bool ucs2_mode)
{
if (self->length_in_bytes == 0) {
return NULL;
@@ -736,12 +736,12 @@
- boundaries.start_offset_in_bytes);
}
-static string_t *
-str_plus_string(string_t *str1, string_t *str2)
+static rb_str_t *
+str_plus_string(rb_str_t *str1, rb_str_t *str2)
{
- encoding_t *new_encoding = str_must_have_compatible_encoding(str1, str2);
+ rb_encoding_t *new_encoding = str_must_have_compatible_encoding(str1, str2);
- string_t *new_str = str_alloc();
+ rb_str_t *new_str = str_alloc();
new_str->encoding = new_encoding;
if ((str1->length_in_bytes == 0) && (str2->length_in_bytes == 0)) {
return new_str;
@@ -765,7 +765,7 @@
}
static void
-str_concat_string(string_t *self, string_t *str)
+str_concat_string(rb_str_t *self, rb_str_t *str)
{
if (str->length_in_bytes == 0) {
return;
@@ -795,7 +795,7 @@
}
static bool
-str_is_equal_to_string(string_t *self, string_t *str)
+str_is_equal_to_string(rb_str_t *self, rb_str_t *str)
{
if (self == str) {
return true;
@@ -848,7 +848,7 @@
}
static long
-str_offset_in_bytes_to_index(string_t *self, long offset_in_bytes,
+str_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes,
bool ucs2_mode)
{
if ((offset_in_bytes >= self->length_in_bytes) || (offset_in_bytes < 0)) {
@@ -905,7 +905,7 @@
}
static long
-str_offset_in_bytes_for_string(string_t *self, string_t *searched,
+str_offset_in_bytes_for_string(rb_str_t *self, rb_str_t *searched,
long start_offset_in_bytes)
{
if (start_offset_in_bytes >= self->length_in_bytes) {
@@ -943,7 +943,7 @@
}
static long
-str_index_for_string(string_t *self, string_t *searched, long start_index,
+str_index_for_string(rb_str_t *self, rb_str_t *searched, long start_index,
bool ucs2_mode)
{
str_must_have_compatible_encoding(self, searched);
@@ -978,22 +978,22 @@
}
static bool
-str_include_string(string_t *self, string_t *searched)
+str_include_string(rb_str_t *self, rb_str_t *searched)
{
return (str_offset_in_bytes_for_string(self, searched, 0) != -1);
}
-static string_t *
+static rb_str_t *
str_need_string(VALUE str)
{
if (IS_RSTR(str)) {
- return (string_t *)str;
+ return (rb_str_t *)str;
}
if (TYPE(str) != T_STRING) {
str = rb_str_to_str(str);
}
if (IS_RSTR(str)) {
- return (string_t *)str;
+ return (rb_str_t *)str;
}
return str_new_from_cfstring((CFStringRef)str);
}
@@ -1009,7 +1009,7 @@
}
assert(IS_RSTR(str1)); // TODO
assert(IS_RSTR(str2)); // TODO
- encoding_t *encoding = str_compatible_encoding(RSTR(str1), RSTR(str2));
+ rb_encoding_t *encoding = str_compatible_encoding(RSTR(str1), RSTR(str2));
if (encoding == NULL) {
return Qnil;
}
@@ -1095,11 +1095,11 @@
static VALUE
mr_str_force_encoding(VALUE self, SEL sel, VALUE encoding)
{
- encoding_t *enc;
+ rb_encoding_t *enc;
if (SPECIAL_CONST_P(encoding) || (CLASS_OF(encoding) != rb_cEncoding)) {
abort(); // TODO
}
- enc = (encoding_t *)encoding;
+ enc = (rb_encoding_t *)encoding;
str_force_encoding(RSTR(self), enc);
return self;
}
@@ -1119,7 +1119,7 @@
static VALUE
mr_str_aref(VALUE self, SEL sel, int argc, VALUE *argv)
{
- string_t *ret;
+ rb_str_t *ret;
if (argc == 1) {
VALUE index = argv[0];
switch (TYPE(index)) {
@@ -1133,13 +1133,13 @@
case T_STRING:
{
if (IS_RSTR(index)) {
- string_t *searched = RSTR(index);
+ rb_str_t *searched = RSTR(index);
if (str_include_string(RSTR(self), searched)) {
return (VALUE)str_new_from_string(searched);
}
}
else {
- string_t *searched =
+ rb_str_t *searched =
str_new_from_cfstring((CFStringRef)index);
if (str_include_string(RSTR(self), searched)) {
// no need to duplicate the string as we just
@@ -1211,7 +1211,7 @@
if (argc == 2) {
start_index = NUM2LONG(argv[1]);
}
- string_t *searched = str_need_string(rb_searched);
+ rb_str_t *searched = str_need_string(rb_searched);
long index = str_index_for_string(RSTR(self), searched, start_index, true);
if (index == -1) {
@@ -1225,7 +1225,7 @@
static VALUE
mr_str_getchar(VALUE self, SEL sel, VALUE index)
{
- string_t *ret = str_get_character_at(RSTR(self), FIX2LONG(index), false);
+ rb_str_t *ret = str_get_character_at(RSTR(self), FIX2LONG(index), false);
if (ret == NULL) {
return Qnil;
}
@@ -1262,7 +1262,7 @@
}
if (TYPE(compared_to) == T_STRING) {
- string_t *str;
+ rb_str_t *str;
if (IS_RSTR(compared_to)) {
str = RSTR(compared_to);
}
@@ -1369,7 +1369,7 @@
VALUE
rb_str_new(const char *cstr, long len)
{
- string_t *str = str_alloc();
+ rb_str_t *str = str_alloc();
str_replace_with_bytes(str, cstr, len, ENCODING_BINARY);
return (VALUE)str;
}
@@ -1389,7 +1389,7 @@
VALUE
rb_str_new3(VALUE source)
{
- string_t *str = str_alloc();
+ rb_str_t *str = str_alloc();
str_replace(str, source);
return (VALUE)str;
}
@@ -1420,7 +1420,7 @@
rb_usascii_str_new(const char *cstr, long len)
{
VALUE str = rb_str_new(cstr, len);
- RSTR(str)->encoding = encodings[ENCODING_ASCII];
+ RSTR(str)->encoding = rb_encodings[ENCODING_ASCII];
return str;
}
@@ -1502,6 +1502,15 @@
return str;
}
+void
+rb_str_setter(VALUE val, ID id, VALUE *var)
+{
+ if (!NIL_P(val) && TYPE(val) != T_STRING) {
+ rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
+ }
+ *var = val;
+}
+
ID
rb_to_id(VALUE name)
{
Modified: MacRuby/branches/icu/ucnv.c
===================================================================
--- MacRuby/branches/icu/ucnv.c 2010-02-16 21:53:09 UTC (rev 3554)
+++ MacRuby/branches/icu/ucnv.c 2010-02-16 22:44:14 UTC (rev 3555)
@@ -1,3 +1,14 @@
+/*
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ *
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
#include "encoding.h"
#include "unicode/ucnv.h"
@@ -17,7 +28,7 @@
ucnv_reset(cnv);
static void
-str_ucnv_update_flags(string_t *self)
+str_ucnv_update_flags(rb_str_t *self)
{
assert(!str_is_stored_in_uchars(self));
@@ -62,20 +73,23 @@
}
static void
-str_ucnv_make_data_binary(string_t *self)
+str_ucnv_make_data_binary(rb_str_t *self)
{
assert(str_is_stored_in_uchars(self));
USE_CONVERTER(cnv, self);
UErrorCode err = U_ZERO_ERROR;
- long capa = UCNV_GET_MAX_BYTES_FOR_STRING(BYTES_TO_UCHARS(self->length_in_bytes), ucnv_getMaxCharSize(cnv));
+ long capa = UCNV_GET_MAX_BYTES_FOR_STRING(BYTES_TO_UCHARS(
+ self->length_in_bytes), ucnv_getMaxCharSize(cnv));
char *buffer = xmalloc(capa);
const UChar *source_pos = self->data.uchars;
- const UChar *source_end = self->data.uchars + BYTES_TO_UCHARS(self->length_in_bytes);
+ const UChar *source_end = self->data.uchars
+ + BYTES_TO_UCHARS(self->length_in_bytes);
char *target_pos = buffer;
char *target_end = buffer + capa;
- ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end, NULL, true, &err);
+ ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+ NULL, true, &err);
// there should never be any conversion error here
// (if there's one it means some checking has been forgotten before)
assert(U_SUCCESS(err));
@@ -89,11 +103,12 @@
}
static long
-utf16_bytesize_approximation(encoding_t *enc, int bytesize)
+utf16_bytesize_approximation(rb_encoding_t *enc, int bytesize)
{
long approximation;
if (UTF16_ENC(enc)) {
- approximation = bytesize; // the bytesize in UTF-16 is the same whatever the endianness
+ approximation = bytesize; // the bytesize in UTF-16 is the same
+ // whatever the endianness
}
else if (UTF32_ENC(enc)) {
// the bytesize in UTF-16 is nearly half of the bytesize in UTF-32
@@ -114,13 +129,14 @@
}
static bool
-str_ucnv_try_making_data_uchars(string_t *self)
+str_ucnv_try_making_data_uchars(rb_str_t *self)
{
assert(!str_is_stored_in_uchars(self));
USE_CONVERTER(cnv, self);
- long capa = utf16_bytesize_approximation(self->encoding, self->length_in_bytes);
+ long capa = utf16_bytesize_approximation(self->encoding,
+ self->length_in_bytes);
const char *source_pos = self->data.bytes;
const char *source_end = self->data.bytes + self->length_in_bytes;
UChar *buffer = xmalloc(capa);
@@ -129,7 +145,8 @@
for (;;) {
UChar *target_end = buffer + BYTES_TO_UCHARS(capa);
err = U_ZERO_ERROR;
- ucnv_toUnicode(cnv, &target_pos, target_end, &source_pos, source_end, NULL, true, &err);
+ ucnv_toUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+ NULL, true, &err);
if (err == U_BUFFER_OVERFLOW_ERROR) {
long index = target_pos - buffer;
capa *= 2; // double the buffer's size
@@ -149,18 +166,16 @@
self->capacity_in_bytes = capa;
self->length_in_bytes = UCHARS_TO_BYTES(target_pos - buffer);
GC_WB(&self->data.uchars, buffer);
-
return true;
}
else {
str_set_valid_encoding(self, false);
-
return false;
}
}
static long
-str_ucnv_length(string_t *self, bool ucs2_mode)
+str_ucnv_length(rb_str_t *self, bool ucs2_mode)
{
assert(!str_is_stored_in_uchars(self));
@@ -204,12 +219,13 @@
#define STACK_BUFFER_SIZE 1024
static long
-str_ucnv_bytesize(string_t *self)
+str_ucnv_bytesize(rb_str_t *self)
{
assert(str_is_stored_in_uchars(self));
// for strings stored in UTF-16 for which the Ruby encoding is not UTF-16,
- // we have to convert back the string in its original encoding to get the length in bytes
+ // we have to convert back the string in its original encoding to get the
+ // length in bytes
USE_CONVERTER(cnv, self);
UErrorCode err = U_ZERO_ERROR;
@@ -217,12 +233,14 @@
long len = 0;
char buffer[STACK_BUFFER_SIZE];
const UChar *source_pos = self->data.uchars;
- const UChar *source_end = self->data.uchars + BYTES_TO_UCHARS(self->length_in_bytes);
+ const UChar *source_end = self->data.uchars + BYTES_TO_UCHARS(
+ self->length_in_bytes);
char *target_end = buffer + STACK_BUFFER_SIZE;
for (;;) {
err = U_ZERO_ERROR;
char *target_pos = buffer;
- ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end, NULL, true, &err);
+ ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+ NULL, true, &err);
len += target_pos - buffer;
if (err != U_BUFFER_OVERFLOW_ERROR) {
// if the convertion failed, a check was missing somewhere
@@ -236,7 +254,7 @@
}
static character_boundaries_t
-str_ucnv_get_character_boundaries(string_t *self, long index, bool ucs2_mode)
+str_ucnv_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode)
{
assert(!str_is_stored_in_uchars(self));
@@ -281,19 +299,24 @@
length_in_bytes = min_char_size;
}
boundaries.start_offset_in_bytes = offset_in_bytes;
- boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes + length_in_bytes;
+ boundaries.end_offset_in_bytes =
+ boundaries.start_offset_in_bytes + length_in_bytes;
break;
}
else if (current_index + diff > index) {
- long adjusted_offset = offset_in_bytes + (index - current_index) * min_char_size;
- if (adjusted_offset + min_char_size > offset_in_bytes + converted_width) {
- length_in_bytes = offset_in_bytes + converted_width - adjusted_offset;
+ long adjusted_offset = offset_in_bytes + (index
+ - current_index) * min_char_size;
+ if (adjusted_offset + min_char_size > offset_in_bytes
+ + converted_width) {
+ length_in_bytes = offset_in_bytes + converted_width
+ - adjusted_offset;
}
else {
length_in_bytes = min_char_size;
}
boundaries.start_offset_in_bytes = adjusted_offset;
- boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes + length_in_bytes;
+ boundaries.end_offset_in_bytes =
+ boundaries.start_offset_in_bytes + length_in_bytes;
break;
}
current_index += diff;
@@ -306,7 +329,8 @@
break;
}
else if (current_index+1 == index) {
- boundaries.end_offset_in_bytes = offset_in_bytes + converted_width;
+ boundaries.end_offset_in_bytes = offset_in_bytes
+ + converted_width;
break;
}
++current_index;
@@ -314,7 +338,8 @@
if (current_index == index) {
boundaries.start_offset_in_bytes = offset_in_bytes;
- boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes + converted_width;
+ boundaries.end_offset_in_bytes =
+ boundaries.start_offset_in_bytes + converted_width;
break;
}
@@ -328,7 +353,8 @@
}
static long
-str_ucnv_offset_in_bytes_to_index(string_t *self, long offset_in_bytes, bool ucs2_mode)
+str_ucnv_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes,
+ bool ucs2_mode)
{
assert(!str_is_stored_in_uchars(self));
@@ -347,7 +373,8 @@
if (err == U_INDEX_OUTOFBOUNDS_ERROR) {
// end of the string
// should not happen because str_offset_in_bytes_to_index
- // checks before that offset_in_bytes is inferior to the length in bytes
+ // checks before that offset_in_bytes is inferior to the length
+ // in bytes
abort();
}
else if (U_FAILURE(err)) {
@@ -383,20 +410,23 @@
}
void
-enc_init_ucnv_encoding(encoding_t *encoding)
+enc_init_ucnv_encoding(rb_encoding_t *encoding)
{
// create the ICU converter
UErrorCode err = U_ZERO_ERROR;
UConverter *converter = ucnv_open(encoding->public_name, &err);
if (!U_SUCCESS(err) || (converter == NULL)) {
- fprintf(stderr, "Couldn't create the encoder for %s\n", encoding->public_name);
+ fprintf(stderr, "Couldn't create the encoder for %s\n",
+ encoding->public_name);
abort();
}
// stop the conversion when the conversion failed
err = U_ZERO_ERROR;
- ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err);
+ ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
+ &err);
err = U_ZERO_ERROR;
- ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &err);
+ ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
+ NULL, &err);
// fill the fields not filled yet
encoding->private_data = converter;
@@ -405,6 +435,8 @@
encoding->methods.try_making_data_uchars = str_ucnv_try_making_data_uchars;
encoding->methods.length = str_ucnv_length;
encoding->methods.bytesize = str_ucnv_bytesize;
- encoding->methods.get_character_boundaries = str_ucnv_get_character_boundaries;
- encoding->methods.offset_in_bytes_to_index = str_ucnv_offset_in_bytes_to_index;
+ encoding->methods.get_character_boundaries =
+ str_ucnv_get_character_boundaries;
+ encoding->methods.offset_in_bytes_to_index =
+ str_ucnv_offset_in_bytes_to_index;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100216/91db7af3/attachment-0001.html>
More information about the macruby-changes
mailing list