[macruby-changes] [5051] MacRuby/trunk/string.c
source_changes at macosforge.org
source_changes at macosforge.org
Fri Dec 17 20:38:48 PST 2010
Revision: 5051
http://trac.macosforge.org/projects/ruby/changeset/5051
Author: vincent.isambart at gmail.com
Date: 2010-12-17 20:38:44 -0800 (Fri, 17 Dec 2010)
Log Message:
-----------
fixed String#inspect on strings containing non-BMP characters and
cleaned-up some checks
Modified Paths:
--------------
MacRuby/trunk/string.c
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2010-12-18 03:52:38 UTC (rev 5050)
+++ MacRuby/trunk/string.c 2010-12-18 04:38:44 UTC (rev 5051)
@@ -280,13 +280,14 @@
static bool str_try_making_data_uchars(rb_str_t *self);
static void
-str_append_uchar(rb_str_t *self, UChar c)
+str_append_uchar32(rb_str_t *self, UChar32 c)
{
assert(str_try_making_data_uchars(self));
const long uchar_cap = BYTES_TO_UCHARS(self->capacity_in_bytes);
const long uchar_len = BYTES_TO_UCHARS(self->length_in_bytes);
- if (uchar_len + 1 >= uchar_cap) {
- assert(uchar_len + 1 < uchar_cap + 10);
+ int concat_len = U_IS_BMP(c) ? 1 : 2;
+ if (uchar_len + concat_len >= uchar_cap) {
+ assert(uchar_len + concat_len < uchar_cap + 10);
self->capacity_in_bytes += UCHARS_TO_BYTES(10);
UChar *uchars = (UChar *)xrealloc(self->data.uchars,
self->capacity_in_bytes);
@@ -294,8 +295,14 @@
GC_WB(&self->data.uchars, uchars);
}
}
- self->data.uchars[uchar_len] = c;
- self->length_in_bytes += UCHARS_TO_BYTES(1);
+ if (U_IS_BMP(c)) {
+ self->data.uchars[uchar_len] = c;
+ }
+ else {
+ self->data.uchars[uchar_len] = U16_LEAD(c);
+ self->data.uchars[uchar_len+1] = U16_TRAIL(c);
+ }
+ self->length_in_bytes += UCHARS_TO_BYTES(concat_len);
}
static void
@@ -653,16 +660,8 @@
character_boundaries_t last_boundaries =
str_get_character_boundaries(self, last);
- if (first_boundaries.start_offset_in_bytes == -1) {
- if (last_boundaries.end_offset_in_bytes == -1) {
- // you cannot cut a surrogate in an encoding that is not UTF-16
- str_cannot_cut_surrogate();
- }
- else {
- return NULL;
- }
- }
- else if (last_boundaries.end_offset_in_bytes == -1) {
+ if ((first_boundaries.start_offset_in_bytes == -1) ||
+ (last_boundaries.end_offset_in_bytes == -1)) {
// you cannot cut a surrogate in an encoding that is not UTF-16
str_cannot_cut_surrogate();
}
@@ -742,14 +741,13 @@
else {
// Positioning in the string.
beg = str_get_character_boundaries(self, pos);
-
- // TODO: probably call str_cannot_cut_surrogate()
- assert(beg.start_offset_in_bytes != -1);
-
end = str_get_character_boundaries(self, pos + len - 1);
- // TODO: probably call str_cannot_cut_surrogate()
- assert(end.end_offset_in_bytes != -1);
+ if ((beg.start_offset_in_bytes == -1) ||
+ (end.end_offset_in_bytes == -1)) {
+ // you cannot cut a surrogate in an encoding that is not UTF-16
+ str_cannot_cut_surrogate();
+ }
}
const long bytes_to_splice = end.end_offset_in_bytes
@@ -2698,12 +2696,12 @@
*/
static void
-inspect_append(VALUE result, UChar c, bool escape)
+inspect_append(VALUE result, UChar32 c, bool escape)
{
if (escape) {
- str_append_uchar(RSTR(result), '\\');
+ str_append_uchar32(RSTR(result), '\\');
}
- str_append_uchar(RSTR(result), c);
+ str_append_uchar32(RSTR(result), c);
}
static VALUE
@@ -3618,10 +3616,10 @@
break;
default:
- str_append_uchar(RSTR(val), '\\');
+ str_append_uchar32(RSTR(val), '\\');
// fall through
case '\\':
- str_append_uchar(RSTR(val), c);
+ str_append_uchar32(RSTR(val), c);
break;
}
@@ -6318,7 +6316,7 @@
rb_str_append_uchar(VALUE str, UChar c)
{
if (IS_RSTR(str)) {
- str_append_uchar(RSTR(str), c);
+ str_append_uchar32(RSTR(str), c);
}
else {
CFStringAppendCharacters((CFMutableStringRef)str, &c, 1);
@@ -6371,7 +6369,6 @@
}
else {
abort(); // TODO
-
}
return str;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20101217/e67fd57e/attachment.html>
More information about the macruby-changes
mailing list