[macruby-changes] [5199] MacRuby/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Thu Jan 27 20:48:32 PST 2011
Revision: 5199
http://trac.macosforge.org/projects/ruby/changeset/5199
Author: vincent.isambart at gmail.com
Date: 2011-01-27 20:48:31 -0800 (Thu, 27 Jan 2011)
Log Message:
-----------
String#index faster and with less bugs
Modified Paths:
--------------
MacRuby/trunk/encoding_ucnv.h
MacRuby/trunk/string.c
MacRuby/trunk/ucnv.c
Modified: MacRuby/trunk/encoding_ucnv.h
===================================================================
--- MacRuby/trunk/encoding_ucnv.h 2011-01-28 04:39:20 UTC (rev 5198)
+++ MacRuby/trunk/encoding_ucnv.h 2011-01-28 04:48:31 UTC (rev 5199)
@@ -25,7 +25,9 @@
character_boundaries_t str_ucnv_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode);
void str_ucnv_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length);
void str_ucnv_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *utf16_pos, char **bytes, long *bytes_length);
-void str_ucnv_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback);
+void str_ucnv_each_uchar32_starting_from(rb_str_t *self,
+ long start_offset_in_bytes,
+ each_uchar32_callback_t callback);
#if defined(__cplusplus)
} // extern "C"
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2011-01-28 04:39:20 UTC (rev 5198)
+++ MacRuby/trunk/string.c 2011-01-28 04:48:31 UTC (rev 5199)
@@ -442,12 +442,15 @@
// Note that each_uchar32 iterates on Unicode characters
// With a character not in the BMP the callback will only be called once!
+// start_offset_in_bytes MUST be at a character boundary
static void
-str_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback)
+str_each_uchar32_starting_from(rb_str_t *self,
+ long start_offset_in_bytes,
+ each_uchar32_callback_t callback)
{
if (IS_BINARY_ENC(self->encoding) || IS_ASCII_ENC(self->encoding)) {
bool stop = false;
- for (long i = 0; i < self->length_in_bytes; ++i) {
+ for (long i = start_offset_in_bytes; i < self->length_in_bytes; ++i) {
UChar32 c = (uint8_t)self->bytes[i];
if (!IS_BINARY_ENC(self->encoding) && c > 127) {
c = U_SENTINEL;
@@ -460,7 +463,7 @@
}
else if (IS_UTF8_ENC(self->encoding)) {
bool stop = false;
- for (int i = 0; i < self->length_in_bytes; ) {
+ for (int i = start_offset_in_bytes; i < self->length_in_bytes; ) {
UChar32 c;
int old_i = i;
U8_NEXT(self->bytes, i, self->length_in_bytes, c);
@@ -482,6 +485,7 @@
};
}
else if (IS_NATIVE_UTF16_ENC(self->encoding)) {
+ assert(!ODD_NUMBER(start_offset_in_bytes));
bool stop = false;
long length = BYTES_TO_UCHARS(self->length_in_bytes);
UChar *uchars = (UChar *)self->bytes;
@@ -500,10 +504,17 @@
};
}
else {
- str_ucnv_each_uchar32(self, callback);
+ str_ucnv_each_uchar32_starting_from(self,
+ start_offset_in_bytes, callback);
}
}
+static void
+str_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback)
+{
+ str_each_uchar32_starting_from(self, 0, callback);
+}
+
static UChar
str_get_uchar(rb_str_t *self, long pos)
{
@@ -1134,6 +1145,27 @@
end_offset_in_bytes = boundaries.end_offset_in_bytes;
}
+ if (!backward_search) {
+ __block long returned_index = -1;
+ __block long current_index = start_index;
+ str_each_uchar32_starting_from(self, start_offset_in_bytes,
+ ^(UChar32 c, long character_start_offset, long char_len, bool *stop) {
+ if (end_offset_in_bytes - character_start_offset < searched->length_in_bytes) {
+ // not enough characters left: we could not find the string
+ *stop = true;
+ return;
+ }
+ if (memcmp(self->bytes + character_start_offset,
+ searched->bytes, searched->length_in_bytes) == 0) {
+ returned_index = current_index;
+ *stop = true;
+ return;
+ }
+ ++current_index;
+ });
+ return returned_index;
+ }
+
const long offset_in_bytes = str_offset_in_bytes_for_string(self,
searched, start_offset_in_bytes, end_offset_in_bytes,
backward_search);
@@ -1179,8 +1211,8 @@
static bool
str_include_string(rb_str_t *self, rb_str_t *searched)
{
- return str_offset_in_bytes_for_string(self, searched, 0,
- self->length_in_bytes, true) != -1;
+ return str_index_for_string_with_cache(self, searched,
+ 0, -1, false, NULL) != -1;
}
rb_str_t *
Modified: MacRuby/trunk/ucnv.c
===================================================================
--- MacRuby/trunk/ucnv.c 2011-01-28 04:39:20 UTC (rev 5198)
+++ MacRuby/trunk/ucnv.c 2011-01-28 04:48:31 UTC (rev 5199)
@@ -138,12 +138,14 @@
void rb_ensure_b(void (^b_block)(void), void (^e_block)(void));
void
-str_ucnv_each_uchar32(rb_str_t *self, each_uchar32_callback_t callback)
+str_ucnv_each_uchar32_starting_from(rb_str_t *self,
+ long start_offset_in_bytes,
+ each_uchar32_callback_t callback)
{
USE_CONVERTER(cnv, self->encoding);
rb_ensure_b(^{
- const char *pos = self->bytes;
+ const char *pos = self->bytes + start_offset_in_bytes;
const char *end = pos + self->length_in_bytes;
bool stop = false;
for (;;) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20110127/3d7fcba7/attachment.html>
More information about the macruby-changes
mailing list