[macruby-changes] [5210] MacRuby/trunk/string.c

source_changes at macosforge.org source_changes at macosforge.org
Sat Jan 29 22:18:39 PST 2011


Revision: 5210
          http://trac.macosforge.org/projects/ruby/changeset/5210
Author:   vincent.isambart at gmail.com
Date:     2011-01-29 22:18:39 -0800 (Sat, 29 Jan 2011)
Log Message:
-----------
UTF-8 specific version of str_extract_uchars_range

Modified Paths:
--------------
    MacRuby/trunk/string.c

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2011-01-30 06:18:33 UTC (rev 5209)
+++ MacRuby/trunk/string.c	2011-01-30 06:18:39 UTC (rev 5210)
@@ -1263,6 +1263,49 @@
 	    buffer[i] = source_bytes[i];
 	}
     }
+    else if (IS_UTF8_ENC(self->encoding)) {
+	long pos_in_src = 0;
+	long pos_in_dst = 0;
+	for (int i = 0; i < self->length_in_bytes; ) {
+	    UChar32 c;
+	    int old_i = i;
+	    U8_NEXT(self->bytes, i, self->length_in_bytes, c);
+	    if (c == U_SENTINEL) {
+		int diff = i - old_i;
+		if (pos_in_src + diff > range_start_offset_in_uchars) {
+		    int start = range_start_offset_in_uchars - pos_in_src;
+		    if (start < 0) {
+			start = 0;
+		    }
+		    for (int j = start; j < diff && pos_in_dst < range_length_in_uchars; ++j) {
+			buffer[pos_in_dst++] = self->bytes[old_i+j];
+		    }
+		}
+		pos_in_src += diff;
+	    }
+	    else if (U_IS_BMP(c)) {
+		if (pos_in_src >= range_start_offset_in_uchars) {
+		    buffer[pos_in_dst++] = c;
+		}
+		++pos_in_src;
+	    }
+	    else {
+		if (pos_in_src >= range_start_offset_in_uchars) {
+		    buffer[pos_in_dst++] = U16_LEAD(c);
+		    if (pos_in_dst < range_length_in_uchars) {
+			buffer[pos_in_dst++] = U16_TRAIL(c);
+		    }
+		}
+		else if (pos_in_src + 1 >= range_length_in_uchars) {
+		    buffer[pos_in_dst++] = U16_TRAIL(c);
+		}
+		pos_in_src += 2;
+	    }
+	    if (pos_in_dst >= range_length_in_uchars) {
+		break;
+	    }
+	}
+    }
     else if (IS_NATIVE_UTF16_ENC(self->encoding)) {
 	memcpy(buffer,
 		&self->bytes[UCHARS_TO_BYTES(range_start_offset_in_uchars)],
@@ -1308,6 +1351,9 @@
 		pos_in_src++;
 	    }
 	    else {
+		if (pos_in_src + 1 == range_start_offset_in_uchars) {
+		    buffer[pos_in_dst++] = U16_TRAIL(c);
+		}
 		pos_in_src += 2;
 	    }
 	    if (pos_in_dst >= range_length_in_uchars) {
@@ -2586,7 +2632,7 @@
 	    return self;
     }
 
-    if (RSTR(self)->encoding == rb_encodings[ENCODING_UTF8]) {
+    if (IS_UTF8_ENC(RSTR(self)->encoding)) {
 	const int bytelen = U8_LENGTH(codepoint);
 	if (bytelen <= 0) {
 	    goto out_of_range;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20110129/9d8e0900/attachment.html>


More information about the macruby-changes mailing list