[macruby-changes] [5049] MacRuby/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Fri Dec 17 18:39:46 PST 2010
Revision: 5049
http://trac.macosforge.org/projects/ruby/changeset/5049
Author: vincent.isambart at gmail.com
Date: 2010-12-17 18:39:42 -0800 (Fri, 17 Dec 2010)
Log Message:
-----------
Started using C-blocks (note that they should not be used in
ObjC or sensitive places like dispatcher.cpp)
All this was to start cleaning-up string code.
I removed the function pointers for each encoding as I'm pretty sure no
one will use them to extend the encoding-handling and they make the code
harder to maintain.
Feature-wise currently the only change is that String#inspect is much
better when part of a string is invalid:
% ./miniruby -e 'p "?\227?\129?\130\xFF"'
"?\227?\129?\130\xFF"
Modified Paths:
--------------
MacRuby/trunk/encoding.c
MacRuby/trunk/encoding.h
MacRuby/trunk/rakelib/builder/options.rb
MacRuby/trunk/string.c
MacRuby/trunk/ucnv.c
MacRuby/trunk/vm.cpp
Added Paths:
-----------
MacRuby/trunk/encoding_ucnv.h
Modified: MacRuby/trunk/encoding.c
===================================================================
--- MacRuby/trunk/encoding.c 2010-12-18 00:14:02 UTC (rev 5048)
+++ MacRuby/trunk/encoding.c 2010-12-18 02:39:42 UTC (rev 5049)
@@ -22,16 +22,6 @@
static rb_encoding_t *default_external = NULL;
rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
-static void str_undefined_update_flags(rb_str_t *self) { abort(); }
-static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
-static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
-static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
-static long str_undefined_bytesize(rb_str_t *self) { abort(); }
-static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
-static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
-static void str_undefined_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length) { abort(); }
-static void str_undefined_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *pos, char **bytes, long *bytes_length) { abort(); }
-
static VALUE
mr_enc_s_list(VALUE klass, SEL sel)
{
@@ -273,22 +263,6 @@
encoding->aliases_count = aliases_count;
encoding->aliases = aliases;
- // fill the default implementations with aborts
- encoding->methods.update_flags = str_undefined_update_flags;
- encoding->methods.make_data_binary = str_undefined_make_data_binary;
- encoding->methods.try_making_data_uchars =
- str_undefined_try_making_data_uchars;
- encoding->methods.length = str_undefined_length;
- encoding->methods.bytesize = str_undefined_bytesize;
- encoding->methods.get_character_boundaries =
- str_undefined_get_character_boundaries;
- encoding->methods.offset_in_bytes_to_index =
- str_undefined_offset_in_bytes_to_index;
- encoding->methods.transcode_to_utf16 =
- str_undefined_transcode_to_utf16;
- encoding->methods.transcode_from_utf16 =
- str_undefined_transcode_from_utf16;
-
switch (rb_encoding_type) {
case ENCODING_TYPE_SPECIAL:
break;
Modified: MacRuby/trunk/encoding.h
===================================================================
--- MacRuby/trunk/encoding.h 2010-12-18 00:14:02 UTC (rev 5048)
+++ MacRuby/trunk/encoding.h 2010-12-18 02:39:42 UTC (rev 5049)
@@ -110,18 +110,6 @@
long end_offset_in_bytes;
} character_boundaries_t;
-typedef struct {
- void (*update_flags)(rb_str_t *);
- void (*make_data_binary)(rb_str_t *);
- bool (*try_making_data_uchars)(rb_str_t *);
- long (*length)(rb_str_t *, bool);
- long (*bytesize)(rb_str_t *);
- character_boundaries_t (*get_character_boundaries)(rb_str_t *, long, bool);
- long (*offset_in_bytes_to_index)(rb_str_t *, long, bool);
- void (*transcode_to_utf16)(struct rb_encoding *, rb_str_t *, long *, UChar **, long *);
- void (*transcode_from_utf16)(struct rb_encoding *, UChar *, long, long *, char **, long *);
-} encoding_methods_t;
-
typedef struct rb_encoding {
struct RBasic basic;
unsigned int index;
@@ -131,7 +119,6 @@
unsigned char min_char_size;
bool single_byte_encoding : 1;
bool ascii_compatible : 1;
- encoding_methods_t methods;
void *private_data;
} rb_encoding_t;
Added: MacRuby/trunk/encoding_ucnv.h
===================================================================
--- MacRuby/trunk/encoding_ucnv.h (rev 0)
+++ MacRuby/trunk/encoding_ucnv.h 2010-12-18 02:39:42 UTC (rev 5049)
@@ -0,0 +1,38 @@
+/*
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ *
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
+#ifndef __UCNV_H_
+#define __UCNV_H_
+
+#include "encoding.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef void (^each_char_callback_t)(UChar32 c, const char* character_start, long character_length, bool *stop);
+
+void str_ucnv_update_flags(rb_str_t *self);
+void str_ucnv_make_data_binary(rb_str_t *self);
+bool str_ucnv_try_making_data_uchars(rb_str_t *self);
+long str_ucnv_length(rb_str_t *self, bool ucs2_mode);
+long str_ucnv_bytesize(rb_str_t *self);
+character_boundaries_t str_ucnv_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode);
+long str_ucnv_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode);
+void str_ucnv_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length);
+void str_ucnv_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *utf16_pos, char **bytes, long *bytes_length);
+void str_ucnv_each_char(rb_str_t *self, each_char_callback_t callback);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif /* __UCNV_H_ */
Modified: MacRuby/trunk/rakelib/builder/options.rb
===================================================================
--- MacRuby/trunk/rakelib/builder/options.rb 2010-12-18 00:14:02 UTC (rev 5048)
+++ MacRuby/trunk/rakelib/builder/options.rb 2010-12-18 02:39:42 UTC (rev 5049)
@@ -152,8 +152,8 @@
sdk = opt.delete(:sdk)
has_libauto = sdk ? File.exist?("#{sdk}/usr/lib/libauto.dylib") : true
archflags = archs.map { |x| "-arch #{x}" }.join(' ')
- @cflags = "-std=c99 -I. -I./include -fno-common -pipe -g -Wall -fexceptions -O#{OPTZ_LEVEL} -Wno-deprecated-declarations -Werror #{archflags}"
- @cxxflags = "-I. -I./include -g -Wall -Wno-deprecated-declarations -Werror #{archflags}"
+ @cflags = "-std=c99 -I. -I./include -pipe -fno-common -fexceptions -fblocks -g -O#{OPTZ_LEVEL} -Wall -Wno-deprecated-declarations -Werror #{archflags}"
+ @cxxflags = "-I. -I./include -fblocks -g -Wall -Wno-deprecated-declarations -Werror #{archflags}"
@ldflags = '-lpthread -ldl -lxml2 -lobjc -licucore -framework Foundation'
@ldflags << " -lauto" if has_libauto
if opt.delete(:static)
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2010-12-18 00:14:02 UTC (rev 5048)
+++ MacRuby/trunk/string.c 2010-12-18 02:39:42 UTC (rev 5049)
@@ -23,6 +23,7 @@
#include "ruby/node.h"
#include "vm.h"
#include "class.h"
+#include "encoding_ucnv.h"
#include <unicode/unum.h>
#include <unicode/utrans.h>
@@ -138,7 +139,7 @@
str_update_flags_utf16(self);
}
else {
- self->encoding->methods.update_flags(self);
+ str_ucnv_update_flags(self);
}
}
@@ -387,7 +388,7 @@
return;
}
- self->encoding->methods.make_data_binary(self);
+ str_ucnv_make_data_binary(self);
}
static bool
@@ -418,7 +419,7 @@
return false;
}
- return self->encoding->methods.try_making_data_uchars(self);
+ return str_ucnv_try_making_data_uchars(self);
}
static void
@@ -469,11 +470,52 @@
return div_round_up(self->length_in_bytes, 2);
}
else {
- return self->encoding->methods.length(self, ucs2_mode);
+ return str_ucnv_length(self, ucs2_mode);
}
}
}
+static void
+str_each_char(rb_str_t *self, each_char_callback_t callback)
+{
+ if (str_is_stored_in_uchars(self)) {
+ bool stop = false;
+ long length = BYTES_TO_UCHARS(self->length_in_bytes);
+ for (long i = 0; i < length;) {
+ UChar32 c;
+ long old_i = i;
+ U16_NEXT(self->data.uchars, i, length, c);
+ callback(c, (const char *)&self->data.uchars[old_i],
+ UCHARS_TO_BYTES(old_i-i), &stop);
+ if (stop) {
+ return;
+ }
+ };
+ }
+ else if (BINARY_ENC(self->encoding)
+ || (self->encoding == rb_encodings[ENCODING_ASCII])) {
+ const uint8_t *pos = (uint8_t*)self->data.bytes;
+ const uint8_t *end = pos + self->length_in_bytes;
+ bool stop = false;
+ for (; pos < end; ++pos) {
+ UChar32 c;
+ if (*pos > 127) {
+ c = U_SENTINEL;
+ }
+ else {
+ c = *pos;
+ }
+ callback(c, (const char *)pos, 1, &stop);
+ if (stop) {
+ return;
+ }
+ }
+ }
+ else {
+ str_ucnv_each_char(self, callback);
+ }
+}
+
static UChar
str_get_uchar(rb_str_t *self, long pos, bool ucs2_mode)
{
@@ -494,7 +536,7 @@
return self->length_in_bytes;
}
else {
- return self->encoding->methods.bytesize(self);
+ return str_ucnv_bytesize(self);
}
}
else {
@@ -654,7 +696,7 @@
+ 2;
}
else {
- boundaries = self->encoding->methods.get_character_boundaries(self,
+ boundaries = str_ucnv_get_character_boundaries(self,
index, ucs2_mode);
}
}
@@ -1032,7 +1074,7 @@
return BYTES_TO_UCHARS(offset_in_bytes);
}
else {
- return self->encoding->methods.offset_in_bytes_to_index(self,
+ return str_ucnv_offset_in_bytes_to_index(self,
offset_in_bytes, ucs2_mode);
}
}
@@ -1362,7 +1404,7 @@
pos_in_src = self->length_in_bytes;
}
else {
- src_encoding_used->methods.transcode_to_utf16(src_encoding_used,
+ str_ucnv_transcode_to_utf16(src_encoding_used,
self, &pos_in_src, &utf16, &utf16_length);
}
@@ -1441,7 +1483,7 @@
for (;;) {
long bytes_length;
char *bytes;
- dst_encoding_used->methods.transcode_from_utf16(dst_encoding_used,
+ str_ucnv_transcode_from_utf16(dst_encoding_used,
utf16, utf16_length, &utf16_pos, &bytes, &bytes_length);
if (bytes_length > 0) {
str_concat_bytes(dst_str, bytes, bytes_length);
@@ -2785,7 +2827,8 @@
VALUE result;
if (len == 0) {
result = rb_str_new2("\"\"");
- goto bail;
+ OBJ_INFECT(result, str);
+ return result;
}
// Allocate an UTF-8 string with a good initial capacity.
@@ -2794,31 +2837,18 @@
BINARY_ENC(str->encoding) ? (len * 5) + 2 : len + 2;
result = rb_unicode_str_new(NULL, result_init_len);
-#define GET_UCHAR(pos) \
- ((uchars \
- ? str->data.uchars[pos] : (unsigned char)str->data.bytes[pos]))
-
inspect_append(result, '"', false);
- for (long i = 0; i < len; i++) {
- const UChar c = GET_UCHAR(i);
-
- bool print;
- if (uchars) {
- print = iswprint(c);
+ __block UChar32 prev = 0;
+ str_each_char(str, ^(UChar32 c, const char* char_start, long char_len, bool *stop) {
+ bool print = iswprint(c);
+ if (dump && prev == '#') {
+ inspect_append(result, prev, (c == '$' || c == '@' || c == '{'));
}
- else { // ASCII printable characters
- print = ((c >= 0x20) && (c <= 0x7E));
- }
if (print) {
if (c == '"' || c == '\\') {
inspect_append(result, c, true);
}
- else if (dump && c == '#' && i + 1 < len) {
- const UChar c2 = GET_UCHAR(i + 1);
- const bool need_escape = c2 == '$' || c2 == '@' || c2 == '{';
- inspect_append(result, c, need_escape);
- }
- else {
+ else if (c != '#' || !dump) {
inspect_append(result, c, false);
}
}
@@ -2848,19 +2878,23 @@
}
else {
char buf[10];
- snprintf(buf, sizeof buf, "\\x%02X", c);
- char *p = buf;
- while (*p != '\0') {
- inspect_append(result, *p, false);
- p++;
+ for (long i = 0; i < char_len; ++i) {
+ uint8_t byte = (uint8_t)char_start[i];
+ snprintf(buf, sizeof buf, "\\x%02X", byte);
+ char *p = buf;
+ while (*p != '\0') {
+ inspect_append(result, *p, false);
+ p++;
+ }
}
}
+ prev = c;
+ });
+ if (dump && prev == '#') {
+ inspect_append(result, prev, false);
}
inspect_append(result, '"', false);
-#undef GET_UCHAR
-
-bail:
OBJ_INFECT(result, str);
return result;
}
Modified: MacRuby/trunk/ucnv.c
===================================================================
--- MacRuby/trunk/ucnv.c 2010-12-18 00:14:02 UTC (rev 5048)
+++ MacRuby/trunk/ucnv.c 2010-12-18 02:39:42 UTC (rev 5049)
@@ -10,7 +10,7 @@
*/
#include "ruby/macruby.h"
-#include "encoding.h"
+#include "encoding_ucnv.h"
#include "unicode/ucnv.h"
// do not forget to close the converter
@@ -28,7 +28,7 @@
); \
ucnv_reset(cnv);
-static void
+void
str_ucnv_update_flags(rb_str_t *self)
{
assert(!str_is_stored_in_uchars(self));
@@ -73,7 +73,7 @@
str_set_ascii_only(self, ascii_only);
}
-static void
+void
str_ucnv_make_data_binary(rb_str_t *self)
{
assert(str_is_stored_in_uchars(self));
@@ -129,7 +129,7 @@
return approximation;
}
-static bool
+bool
str_ucnv_try_making_data_uchars(rb_str_t *self)
{
assert(!str_is_stored_in_uchars(self));
@@ -175,7 +175,7 @@
}
}
-static long
+long
str_ucnv_length(rb_str_t *self, bool ucs2_mode)
{
assert(!str_is_stored_in_uchars(self));
@@ -218,8 +218,59 @@
return len;
}
+
+void rb_ensure_b(void (^b_block)(void), void (^e_block)(void));
+
+void
+str_ucnv_each_char(rb_str_t *self, each_char_callback_t callback)
+{
+ assert(!str_is_stored_in_uchars(self));
+
+ USE_CONVERTER(cnv, self->encoding);
+
+ rb_ensure_b(^{
+ const char *pos = self->data.bytes;
+ const char *end = pos + self->length_in_bytes;
+ bool stop = false;
+ for (;;) {
+ const char *char_start_pos = pos;
+ // iterate through the string one Unicode code point at a time
+ UErrorCode err = U_ZERO_ERROR;
+ UChar32 c = ucnv_getNextUChar(cnv, &pos, end, &err);
+ if (err == U_INDEX_OUTOFBOUNDS_ERROR) {
+ // end of the string
+ break;
+ }
+ else if (U_FAILURE(err)) {
+ long min_char_size = self->encoding->min_char_size;
+ while (char_start_pos < pos) {
+ long char_len = pos - char_start_pos;
+ if (char_len > min_char_size) {
+ char_len = min_char_size;
+ }
+ callback(U_SENTINEL, char_start_pos, char_len, &stop);
+ if (stop) {
+ return;
+ }
+ char_start_pos += char_len;
+ }
+ }
+ else {
+ long char_len = pos - char_start_pos;
+ callback(c, char_start_pos, char_len, &stop);
+ if (stop) {
+ return;
+ }
+ }
+ }
+ }, ^{
+ ucnv_close(cnv);
+ });
+}
+
+
#define STACK_BUFFER_SIZE 1024
-static long
+long
str_ucnv_bytesize(rb_str_t *self)
{
assert(str_is_stored_in_uchars(self));
@@ -254,7 +305,7 @@
return len;
}
-static character_boundaries_t
+character_boundaries_t
str_ucnv_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode)
{
assert(!str_is_stored_in_uchars(self));
@@ -353,7 +404,7 @@
return boundaries;
}
-static long
+long
str_ucnv_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes,
bool ucs2_mode)
{
@@ -410,7 +461,7 @@
return index;
}
-static void
+void
str_ucnv_transcode_to_utf16(struct rb_encoding *src_enc,
rb_str_t *self, long *pos,
UChar **utf16, long *utf16_length)
@@ -452,7 +503,7 @@
}
}
-static void
+void
str_ucnv_transcode_from_utf16(struct rb_encoding *dst_enc,
UChar *utf16, long utf16_length, long *utf16_pos,
char **bytes, long *bytes_length)
@@ -504,17 +555,4 @@
// fill the fields not filled yet
encoding->private_data = converter;
- encoding->methods.update_flags = str_ucnv_update_flags;
- encoding->methods.make_data_binary = str_ucnv_make_data_binary;
- encoding->methods.try_making_data_uchars = str_ucnv_try_making_data_uchars;
- encoding->methods.length = str_ucnv_length;
- encoding->methods.bytesize = str_ucnv_bytesize;
- encoding->methods.get_character_boundaries =
- str_ucnv_get_character_boundaries;
- encoding->methods.offset_in_bytes_to_index =
- str_ucnv_offset_in_bytes_to_index;
- encoding->methods.transcode_to_utf16 =
- str_ucnv_transcode_to_utf16;
- encoding->methods.transcode_from_utf16 =
- str_ucnv_transcode_from_utf16;
}
Modified: MacRuby/trunk/vm.cpp
===================================================================
--- MacRuby/trunk/vm.cpp 2010-12-18 00:14:02 UTC (rev 5048)
+++ MacRuby/trunk/vm.cpp 2010-12-18 02:39:42 UTC (rev 5049)
@@ -3539,6 +3539,21 @@
extern "C"
void
+rb_ensure_b(void (^b_block)(void), void (^e_block)(void))
+{
+ struct Finally {
+ void (^e_block)(void);
+ Finally(void (^_e_block)(void)) {
+ e_block = _e_block;
+ }
+ ~Finally() { e_block(); }
+ } finalizer(e_block);
+
+ b_block();
+}
+
+extern "C"
+void
rb_vm_break(VALUE val)
{
#if 0
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20101217/8ea6d6dc/attachment-0001.html>
More information about the macruby-changes
mailing list