[macruby-changes] [3673] MacRuby/branches/icu/string.c

source_changes at macosforge.org source_changes at macosforge.org
Tue Mar 2 18:42:34 PST 2010


Revision: 3673
          http://trac.macosforge.org/projects/ruby/changeset/3673
Author:   lsansonetti at apple.com
Date:     2010-03-02 18:42:32 -0800 (Tue, 02 Mar 2010)
Log Message:
-----------
added #start_with?, #end_with?, improved the substring find primitive to support search ranges

Modified Paths:
--------------
    MacRuby/branches/icu/string.c

Modified: MacRuby/branches/icu/string.c
===================================================================
--- MacRuby/branches/icu/string.c	2010-03-03 01:37:52 UTC (rev 3672)
+++ MacRuby/branches/icu/string.c	2010-03-03 02:42:32 UTC (rev 3673)
@@ -938,12 +938,13 @@
 
 static long
 str_offset_in_bytes_for_string(rb_str_t *self, rb_str_t *searched,
-	long start_offset_in_bytes, bool backward_search)
+	long start_offset_in_bytes, long end_offset_in_bytes,
+	bool backward_search)
 {
     if (start_offset_in_bytes >= self->length_in_bytes) {
 	return -1;
     }
-    if ((self == searched) && (start_offset_in_bytes == 0)) {
+    if (self == searched && start_offset_in_bytes == 0) {
 	return 0;
     }
     if (searched->length_in_bytes == 0) {
@@ -964,8 +965,8 @@
     }
 
     if (backward_search) {
-	for (long offset_in_bytes = start_offset_in_bytes;
-		offset_in_bytes >= 0;
+	for (long offset_in_bytes = end_offset_in_bytes;
+		offset_in_bytes >= start_offset_in_bytes;
 		offset_in_bytes -= increment) {
 	    if (memcmp(self->data.bytes+offset_in_bytes, searched->data.bytes,
 			searched->length_in_bytes) == 0) {
@@ -974,7 +975,7 @@
 	}
     }
     else {
-	const long max_offset_in_bytes = self->length_in_bytes
+	const long max_offset_in_bytes = end_offset_in_bytes
 	    - searched->length_in_bytes + 1;
 
 	for (long offset_in_bytes = start_offset_in_bytes;
@@ -991,7 +992,7 @@
 
 static long
 str_index_for_string(rb_str_t *self, rb_str_t *searched, long start_index,
-	bool backward_search, bool ucs2_mode)
+	long end_index, bool backward_search, bool ucs2_mode)
 {
     str_must_have_compatible_encoding(self, searched);
     str_make_same_format(self, searched);
@@ -1015,8 +1016,28 @@
 	start_offset_in_bytes = boundaries.start_offset_in_bytes;
     }
 
-    const long offset_in_bytes = str_offset_in_bytes_for_string(RSTR(self),
-	    searched, start_offset_in_bytes, backward_search);
+    long end_offset_in_bytes;
+    if (end_index < 0 || end_index == str_length(self, ucs2_mode)) {
+	end_offset_in_bytes = self->length_in_bytes;
+    }
+    else {
+	character_boundaries_t boundaries = str_get_character_boundaries(self,
+		end_index, ucs2_mode);
+	if (boundaries.start_offset_in_bytes == -1) {
+	    if (boundaries.end_offset_in_bytes == -1) {
+		return -1;
+	    }
+	    else {
+		// you cannot cut a surrogate in an encoding that is not UTF-16
+		str_cannot_cut_surrogate();
+	    }
+	}
+	end_offset_in_bytes = boundaries.end_offset_in_bytes;
+    }
+
+    const long offset_in_bytes = str_offset_in_bytes_for_string(self,
+	    searched, start_offset_in_bytes, end_offset_in_bytes,
+	    backward_search);
     if (offset_in_bytes == -1) {
 	return -1;
     }
@@ -1026,7 +1047,8 @@
 static bool
 str_include_string(rb_str_t *self, rb_str_t *searched)
 {
-    return (str_offset_in_bytes_for_string(self, searched, 0, false) != -1);
+    return str_offset_in_bytes_for_string(self, searched, 0,
+	    self->length_in_bytes, false) != -1;
 }
 
 static rb_str_t *
@@ -1659,7 +1681,7 @@
 	    // fall through
 	case T_STRING:
 	    pos = str_index_for_string(RSTR(self), str_need_string(sub),
-		    pos, false, true);
+		    pos, -1, false, true);
 	    break;
     }
 
@@ -1707,6 +1729,9 @@
 	if (pos >= len) {
 	    pos = len - 1;
 	}
+	else if (pos == 0) {
+	    return Qnil;
+	}
     }
     else {
 	pos = len - 1;
@@ -1722,7 +1747,7 @@
 	    // fall through
 	case T_STRING:
 	    pos = str_index_for_string(RSTR(self), str_need_string(sub),
-		    pos, true, true);
+		    0, pos - 1, true, true);
 	    break;
     }
 
@@ -1968,7 +1993,60 @@
 	? Qtrue : Qfalse;
 }
 
+/*
+ *  call-seq:
+ *     str.start_with?([prefix]+)   => true or false
+ *  
+ *  Returns true if <i>str</i> starts with the prefix given.
+ */
+
 static VALUE
+rstr_start_with(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+    for (int i = 0; i < argc; i++) {
+	VALUE tmp = rb_check_string_type(argv[i]);
+	if (NIL_P(tmp)) {
+	    continue;
+	}
+	const long pos = str_index_for_string(RSTR(str), str_need_string(tmp),
+		0, rb_str_chars_len(tmp), false, false);
+	if (pos == 0) {
+	    return Qtrue;
+	}
+    }
+    return Qfalse;
+}
+
+/*
+ *  call-seq:
+ *     str.end_with?([suffix]+)   => true or false
+ *  
+ *  Returns true if <i>str</i> ends with the suffix given.
+ */
+
+static VALUE
+rstr_end_with(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+    const long len = rb_str_chars_len(str);
+    for (int i = 0; i < argc; i++) {
+	VALUE tmp = rb_check_string_type(argv[i]);
+	if (NIL_P(tmp)) {
+	    continue;
+	}
+	const long sublen = rb_str_chars_len(tmp);
+	if (sublen > len) {
+	    continue;
+	}
+	const long pos = str_index_for_string(RSTR(str), str_need_string(tmp),
+		len - sublen, len, false, false);
+	if (pos == len - sublen) {
+	    return Qtrue;
+	}
+    }
+    return Qfalse;
+}
+
+static VALUE
 rstr_is_stored_in_uchars(VALUE self, SEL sel)
 {
     return str_is_stored_in_uchars(RSTR(self)) ? Qtrue : Qfalse;
@@ -2428,7 +2506,7 @@
 		rb_str_t *spat_str = str_need_string(spat);
 		do {
 		    const long pos = str_index_for_string(RSTR(str), spat_str,
-			    beg, false, false);
+			    beg, -1, false, false);
 		    if (pos == -1) {
 			break;
 		    }
@@ -2674,7 +2752,7 @@
     }
     else if (rslen <= len) {
 	if (str_index_for_string(RSTR(str), str_need_string(rs),
-		    len - rslen, false, false) >= 0) {
+		    len - rslen, -1, false, false) >= 0) {
 	    to_del += rslen;
 	}
     }
@@ -3555,7 +3633,7 @@
 
     long pos = 0;
     do {
-	const long off = str_index_for_string(RSTR(str), rs_str, pos,
+	const long off = str_index_for_string(RSTR(str), rs_str, pos, -1,
 		false, false);
 
 	long substr_len = 0;
@@ -3943,6 +4021,8 @@
     rb_objc_define_method(rb_cRubyString, "<=>", rstr_cmp, 1);
     rb_objc_define_method(rb_cRubyString, "eql?", rstr_eql, 1);
     rb_objc_define_method(rb_cRubyString, "include?", rstr_includes, 1);
+    rb_objc_define_method(rb_cRubyString, "start_with?", rstr_start_with, -1);
+    rb_objc_define_method(rb_cRubyString, "end_with?", rstr_end_with, -1);
     rb_objc_define_method(rb_cRubyString, "to_s", rstr_to_s, 0);
     rb_objc_define_method(rb_cRubyString, "to_str", rstr_to_s, 0);
     rb_objc_define_method(rb_cRubyString, "to_sym", rstr_intern, 0);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100302/4d1af87e/attachment-0001.html>


More information about the macruby-changes mailing list