[macruby-changes] [5055] MacRuby/trunk/string.c
source_changes at macosforge.org
source_changes at macosforge.org
Sun Dec 19 15:01:48 PST 2010
Revision: 5055
http://trac.macosforge.org/projects/ruby/changeset/5055
Author: vincent.isambart at gmail.com
Date: 2010-12-19 15:01:45 -0800 (Sun, 19 Dec 2010)
Log Message:
-----------
a much better each_codepoint and some DRI in yesterday's code
Modified Paths:
--------------
MacRuby/trunk/string.c
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2010-12-19 06:13:10 UTC (rev 5054)
+++ MacRuby/trunk/string.c 2010-12-19 23:01:45 UTC (rev 5055)
@@ -986,6 +986,68 @@
? (rb_str_t *)str : str_new_from_cfstring((CFStringRef)str);
}
+static void
+str_extract_uchars_range(rb_str_t *self, long range_start_offset_in_uchars,
+ long range_length_in_uchars, UChar *buffer)
+{
+ if (range_length_in_uchars <= 0) {
+ return;
+ }
+ if (IS_NATIVE_UTF16_ENC(self->encoding)) {
+ memcpy(buffer,
+ &self->bytes[BYTES_TO_UCHARS(range_start_offset_in_uchars)],
+ BYTES_TO_UCHARS(range_length_in_uchars));
+ }
+ else {
+ __block long pos_in_src = 0;
+ __block long pos_in_dst = 0;
+ str_each_uchar32(self, ^(UChar32 c, long start_index, long char_len, bool *stop) {
+ if (pos_in_src >= range_start_offset_in_uchars) {
+ if (c == U_SENTINEL) {
+ if (char_len == 1) {
+ buffer[pos_in_dst++] = self->bytes[start_index];
+ }
+ else {
+ UChar accumulator = 0;
+ if (self->encoding->little_endian) {
+ for (long i = char_len-1; i >= 0; --i) {
+ accumulator = accumulator << 8
+ | self->bytes[start_index+i];
+ }
+ }
+ else {
+ for (long i = 0; i < char_len; ++i) {
+ accumulator = accumulator << 8
+ | self->bytes[start_index+i];
+ }
+ }
+ buffer[pos_in_dst++] = accumulator;
+ }
+ }
+ else if (U_IS_BMP(c)) {
+ buffer[pos_in_dst++] = c;
+ }
+ else {
+ buffer[pos_in_dst++] = U16_LEAD(c);
+ if (pos_in_dst < range_length_in_uchars) {
+ buffer[pos_in_dst++] = U16_TRAIL(c);
+ }
+ }
+ }
+ if ((c == U_SENTINEL) || U_IS_BMP(c)) {
+ pos_in_src++;
+ }
+ else {
+ pos_in_src += 2;
+ }
+ if (pos_in_dst >= range_length_in_uchars) {
+ *stop = true;
+ }
+ });
+ assert(pos_in_dst == range_length_in_uchars);
+ }
+}
+
void
rb_str_get_uchars(VALUE str, UChar **chars_p, long *chars_len_p,
bool *need_free_p)
@@ -1000,25 +1062,7 @@
chars_len = str_length(RSTR(str));
if (chars_len > 0) {
chars = (UChar *)malloc(sizeof(UChar) * chars_len);
- __block long pos = 0;
- str_each_uchar32(RSTR(str), ^(UChar32 c, long start_index, long char_len, bool *stop) {
- if (c == U_SENTINEL) {
- if (char_len == 1) {
- chars[pos++] = RSTR(str)->bytes[start_index];
- }
- else {
- abort(); // TODO
- }
- }
- else if (U_IS_BMP(c)) {
- chars[pos++] = c;
- }
- else {
- chars[pos++] = U16_LEAD(c);
- chars[pos++] = U16_TRAIL(c);
- }
- });
- assert(pos == chars_len);
+ str_extract_uchars_range(RSTR(str), 0, chars_len, chars);
need_free = true;
}
}
@@ -3798,10 +3842,9 @@
return changed;
}
else {
- if (!IS_UTF32_ENC(RSTR(str)->encoding)
- && !IS_UTF16_ENC(RSTR(str)->encoding)) {
- abort(); // should not happen
- }
+ assert(IS_UTF32_ENC(RSTR(str)->encoding)
+ || IS_UTF16_ENC(RSTR(str)->encoding));
+
__block bool changed = true;
str_each_uchar32(RSTR(str), ^(UChar32 c, long start_index, long char_len, bool *stop) {
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
@@ -4489,16 +4532,18 @@
static VALUE
rstr_each_codepoint(VALUE str, SEL sel)
{
- if (!str_is_valid_encoding(RSTR(str))) {
- rb_raise(rb_eArgError, "invalid byte sequence in %s",
- RSTR(str)->encoding->public_name);
- }
RETURN_ENUMERATOR(str, 0, 0);
- const long len = str_length(RSTR(str));
- for (int i = 0; i < len; i++) {
- rb_yield(INT2NUM(rb_str_get_uchar(str, i)));
- }
+ str_each_uchar32(RSTR(str), ^(UChar32 c, long start_index, long char_len, bool *stop) {
+ if (c == U_SENTINEL) {
+ rb_raise(rb_eArgError, "invalid byte sequence in %s",
+ RSTR(str)->encoding->public_name);
+ }
+ else {
+ rb_yield(INT2NUM(c));
+ }
+ });
+
return str;
}
@@ -5527,40 +5572,7 @@
rstr_imp_getCharactersRange(void *rcv, SEL sel, UniChar *buffer, CFRange range)
{
check_bounds(rcv, range.location + range.length, true);
- if (range.length > 0) {
- __block long pos_in_src = 0;
- __block long pos_in_dst = 0;
- str_each_uchar32(RSTR(rcv), ^(UChar32 c, long start_index, long char_len, bool *stop) {
- if (pos_in_src >= range.location) {
- if (c == U_SENTINEL) {
- if (char_len == 1) {
- buffer[pos_in_dst++] = RSTR(rcv)->bytes[start_index];
- }
- else {
- abort(); // TODO
- }
- }
- else if (U_IS_BMP(c)) {
- buffer[pos_in_dst++] = c;
- }
- else {
- buffer[pos_in_dst++] = U16_LEAD(c);
- if (pos_in_dst < range.length) {
- buffer[pos_in_dst++] = U16_TRAIL(c);
- }
- }
- }
- if ((c == U_SENTINEL) || U_IS_BMP(c)) {
- pos_in_src++;
- }
- else {
- pos_in_src += 2;
- }
- if (pos_in_dst >= range.length) {
- *stop = true;
- }
- });
- }
+ str_extract_uchars_range(RSTR(rcv), range.location, range.length, buffer);
}
static void
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20101219/56ff81c2/attachment.html>
More information about the macruby-changes
mailing list