[macruby-changes] [4975] MacRuby/trunk/string.c
source_changes at macosforge.org
source_changes at macosforge.org
Fri Dec 3 17:09:25 PST 2010
Revision: 4975
http://trac.macosforge.org/projects/ruby/changeset/4975
Author: vincent.isambart at gmail.com
Date: 2010-12-03 17:09:21 -0800 (Fri, 03 Dec 2010)
Log Message:
-----------
String#chop and String#reverse should now work properly with non-BMP characters
(bug #772)
Modified Paths:
--------------
MacRuby/trunk/string.c
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2010-12-03 22:45:17 UTC (rev 4974)
+++ MacRuby/trunk/string.c 2010-12-04 01:09:21 UTC (rev 4975)
@@ -771,12 +771,10 @@
// TODO: probably call str_cannot_cut_surrogate()
assert(beg.start_offset_in_bytes != -1);
- assert(beg.end_offset_in_bytes != -1);
end = str_get_character_boundaries(self, pos + len - 1, ucs2_mode);
// TODO: probably call str_cannot_cut_surrogate()
- assert(end.start_offset_in_bytes != -1);
assert(end.end_offset_in_bytes != -1);
}
@@ -3564,9 +3562,18 @@
}
long to_del = 1;
- if (len >= 2 && rb_str_get_uchar(str, len - 1) == '\n'
- && rb_str_get_uchar(str, len - 2) == '\r') {
- to_del++;
+ if (len >= 2) {
+ // if the string ends with \r\n we have to remove both \r and \n
+ // if the string ends with a character not in the BMP,
+ // we have to remove the whole character
+ UChar last_char = rb_str_get_uchar(str, len - 1);
+ if ((last_char == '\n') || U16_IS_TRAIL(last_char)) {
+ UChar before_last = rb_str_get_uchar(str, len - 2);
+ if (((before_last == '\r') && (last_char == '\n'))
+ || (U16_IS_LEAD(before_last) && U16_IS_TRAIL(last_char))) {
+ to_del++;
+ }
+ }
}
str_delete(RSTR(str), len - to_del, to_del, true);
@@ -5013,11 +5020,29 @@
if (len <= 1) {
return str;
}
+ bool has_lead = false;
for (long i = 0; i < (len / 2); i++) {
UChar c = RSTR(str)->data.uchars[i];
+ if (U16_IS_LEAD(c)) {
+ has_lead = true;
+ }
RSTR(str)->data.uchars[i] = RSTR(str)->data.uchars[len - i - 1];
- RSTR(str)->data.uchars[len - i - 1] = c;
+ RSTR(str)->data.uchars[len - i - 1] = c;
}
+ if (has_lead) {
+ // if the string contained surrogates,
+ // we have to put them back in the correct order
+ for (long i = 0; i < len - 1; ++i) {
+ UChar c = RSTR(str)->data.uchars[i];
+ if (U16_IS_TRAIL(c)) {
+ UChar next = RSTR(str)->data.uchars[i+1];
+ if (U16_IS_LEAD(next)) {
+ RSTR(str)->data.uchars[i] = next;
+ RSTR(str)->data.uchars[i+1] = c;
+ }
+ }
+ }
+ }
}
else {
const long len = RSTR(str)->length_in_bytes;
@@ -5031,6 +5056,10 @@
}
}
+ // we modify it directly so the information stored
+ // in the facultative flags might be outdated
+ str_unset_facultative_flags(RSTR(str));
+
return str;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20101203/e5919c27/attachment.html>
More information about the macruby-changes
mailing list