[macruby-changes] [5077] MacRuby/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Thu Dec 23 02:17:17 PST 2010
Revision: 5077
http://trac.macosforge.org/projects/ruby/changeset/5077
Author: vincent.isambart at gmail.com
Date: 2010-12-23 02:17:13 -0800 (Thu, 23 Dec 2010)
Log Message:
-----------
added many UTF-8 specific optimizations
Modified Paths:
--------------
MacRuby/trunk/encoding.h
MacRuby/trunk/string.c
Modified: MacRuby/trunk/encoding.h
===================================================================
--- MacRuby/trunk/encoding.h 2010-12-23 07:34:08 UTC (rev 5076)
+++ MacRuby/trunk/encoding.h 2010-12-23 10:17:13 UTC (rev 5077)
@@ -184,13 +184,6 @@
}
static inline bool
-str_known_to_have_an_invalid_encoding(rb_str_t *self)
-{
- return (self->flags & (STRING_VALID_ENCODING_SET
- | STRING_VALID_ENCODING)) == STRING_VALID_ENCODING_SET;
-}
-
-static inline bool
str_check_flag_and_update_if_needed(rb_str_t *self, str_flag_t flag_set,
str_flag_t flag)
{
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2010-12-23 07:34:08 UTC (rev 5076)
+++ MacRuby/trunk/string.c 2010-12-23 10:17:13 UTC (rev 5077)
@@ -129,6 +129,26 @@
}
str_set_ascii_only(self, ascii_only);
}
+ else if (IS_UTF8_ENC(self->encoding)) {
+ bool ascii_only = true;
+ bool valid_encoding = true;
+
+ for (int i = 0; i < self->length_in_bytes; ) {
+ UChar32 c;
+ U8_NEXT(self->bytes, i, self->length_in_bytes, c);
+ if (c == U_SENTINEL) {
+ valid_encoding = false;
+ ascii_only = false;
+ break;
+ }
+ else if (c > 127) {
+ ascii_only = false;
+ }
+ }
+
+ str_set_valid_encoding(self, valid_encoding);
+ str_set_ascii_only(self, ascii_only);
+ }
else if (IS_UTF16_ENC(self->encoding)) {
str_update_flags_utf16(self);
}
@@ -357,7 +377,8 @@
static long
str_length(rb_str_t *self)
{
- if (self->encoding->single_byte_encoding) {
+ if (self->encoding->single_byte_encoding
+ || (self->encoding->ascii_compatible && str_is_ascii_only(self))) {
return self->length_in_bytes;
}
else if (IS_UTF8_ENC(self->encoding)) {
@@ -531,7 +552,8 @@
{
character_boundaries_t boundaries = {-1, -1};
- if (self->encoding->single_byte_encoding) {
+ if (self->encoding->single_byte_encoding
+ || (self->encoding->ascii_compatible && str_is_ascii_only(self))) {
if (index < 0) {
index += self->length_in_bytes;
if (index < 0) {
@@ -541,6 +563,54 @@
boundaries.start_offset_in_bytes = index;
boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes + 1;
}
+ else if (IS_UTF8_ENC(self->encoding)) {
+ long pos = 0;
+ int i = 0;
+ if (index < 0) {
+ index += str_length(self);
+ if (index < 0) {
+ return boundaries;
+ }
+ }
+ while (i < self->length_in_bytes) {
+ UChar32 c;
+ int old_i = i;
+ long new_pos = pos;
+ U8_NEXT(self->bytes, i, self->length_in_bytes, c);
+ if (c == U_SENTINEL) {
+ new_pos += i - old_i;
+ if (new_pos > index) {
+ boundaries.start_offset_in_bytes =
+ old_i + (index - pos);
+ boundaries.end_offset_in_bytes =
+ boundaries.start_offset_in_bytes + 1;
+ return boundaries;
+ }
+ }
+ else if (U_IS_BMP(c)) {
+ new_pos++;
+ if (new_pos > index) {
+ boundaries.start_offset_in_bytes = old_i;
+ boundaries.end_offset_in_bytes = i;
+ return boundaries;
+ }
+ }
+ else {
+ new_pos += 2;
+ if (new_pos > index) {
+ if (index == pos) {
+ boundaries.start_offset_in_bytes = old_i;
+ }
+ else {
+ assert(index == pos + 1);
+ boundaries.end_offset_in_bytes = i;
+ }
+ return boundaries;
+ }
+ }
+ pos = new_pos;
+ }
+ }
else if (IS_UTF16_ENC(self->encoding)) {
if (index < 0) {
index += div_round_up(self->length_in_bytes, 2);
@@ -1016,10 +1086,16 @@
if (range_length_in_uchars <= 0) {
return;
}
- if (IS_NATIVE_UTF16_ENC(self->encoding)) {
+ if (self->encoding->ascii_compatible && str_is_ascii_only(self)) {
+ char *source_bytes = &self->bytes[range_start_offset_in_uchars];
+ for (long i = 0; i < range_length_in_uchars; ++i) {
+ buffer[i] = source_bytes[i];
+ }
+ }
+ else if (IS_NATIVE_UTF16_ENC(self->encoding)) {
memcpy(buffer,
- &self->bytes[BYTES_TO_UCHARS(range_start_offset_in_uchars)],
- BYTES_TO_UCHARS(range_length_in_uchars));
+ &self->bytes[UCHARS_TO_BYTES(range_start_offset_in_uchars)],
+ UCHARS_TO_BYTES(range_length_in_uchars));
}
else {
__block long pos_in_src = 0;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20101223/4e4c43b1/attachment-0001.html>
More information about the macruby-changes
mailing list