[macruby-changes] [3697] MacRuby/branches/icu/string.c
source_changes at macosforge.org
source_changes at macosforge.org
Thu Mar 4 16:21:42 PST 2010
Revision: 3697
http://trac.macosforge.org/projects/ruby/changeset/3697
Author: lsansonetti at apple.com
Date: 2010-03-04 16:21:39 -0800 (Thu, 04 Mar 2010)
Log Message:
-----------
added #count, #delete, #squeeze (they only work in the ASCII range)
Modified Paths:
--------------
MacRuby/branches/icu/string.c
Modified: MacRuby/branches/icu/string.c
===================================================================
--- MacRuby/branches/icu/string.c 2010-03-04 17:59:47 UTC (rev 3696)
+++ MacRuby/branches/icu/string.c 2010-03-05 00:21:39 UTC (rev 3697)
@@ -4462,6 +4462,291 @@
return obj;
}
+/*
+ * call-seq:
+ * str.count([other_str]+) => fixnum
+ *
+ * Each <i>other_str</i> parameter defines a set of characters to count. The
+ * intersection of these sets defines the characters to count in
+ * <i>str</i>. Any <i>other_str</i> that starts with a caret (^) is
+ * negated. The sequence c1--c2 means all characters between c1 and c2.
+ *
+ * a = "hello world"
+ * a.count "lo" #=> 5
+ * a.count "lo", "o" #=> 2
+ * a.count "hello", "^l" #=> 4
+ * a.count "ej-m" #=> 4
+ */
+
+static void
+intersect_tr_table(char *tbl, VALUE source)
+{
+ StringValue(source);
+
+ UChar *chars = NULL;
+ long chars_len = 0;
+ bool need_free = false;
+ rb_str_get_uchars(source, &chars, &chars_len, &need_free);
+
+ long pos = 0;
+ bool negate = false;
+ if (chars_len > 0 && chars[0] == '^') {
+ pos++;
+ negate = true;
+ }
+
+ char buf[0xff];
+ char cflag = negate ? 1 : 0;
+ for (int i = 0; i < 0xff; i++) {
+ buf[i] = cflag;
+ }
+
+ bool error = false;
+ cflag = negate ? 0 : 1;
+ while (pos < chars_len) {
+ UChar c = chars[pos];
+
+ if (pos + 2 < chars_len && chars[pos + 1] == '-') {
+ // Range
+ UChar e = chars[pos + 2];
+ if (c > e) {
+ error = true;
+ break;
+ }
+
+ if (c < 0xff && e < 0xff) {
+ while (c <= e) {
+ buf[c & 0xff] = cflag;
+ c++;
+ }
+ }
+ pos += 2;
+ }
+ else {
+ if (c < 0xff) {
+ buf[c & 0xff] = cflag;
+ }
+ pos++;
+ }
+ }
+
+ if (need_free) {
+ free(chars);
+ }
+
+ if (error) {
+ rb_raise(rb_eArgError, "invalid string transliteration");
+ }
+
+ // Intersect both tables.
+ for (int i = 0; i < 0xff; i++) {
+ tbl[i] = tbl[i] && buf[i];
+ }
+}
+
+static void
+create_tr_table(char *tbl, int argc, VALUE *argv)
+{
+ if (argc < 1) {
+ rb_raise(rb_eArgError, "wrong number of arguments");
+ }
+
+ // Fill the table with 1s before starting the intersections.
+ for (int i = 0; i < 0xff; i++) {
+ tbl[i] = 1;
+ }
+
+ for (int i = 0; i < argc; i++) {
+ intersect_tr_table(tbl, argv[i]);
+ }
+}
+
+#define TR_TABLE_CREATE() \
+ char __tbl__[0xff]; \
+ create_tr_table(__tbl__, argc, argv);
+
+#define TR_TABLE_INCLUDES(c) \
+ ((c) < 0xff && __tbl__[(c) & 0xff] == 1)
+
+static VALUE
+rstr_count(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+ TR_TABLE_CREATE();
+
+ UChar *chars = NULL;
+ long chars_len = 0;
+ bool need_free = false;
+ rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+ long count = 0;
+ for (long i = 0; i < chars_len; i++) {
+ if (TR_TABLE_INCLUDES(chars[i])) {
+ count++;
+ }
+ }
+
+ if (need_free) {
+ free(chars);
+ }
+
+ return LONG2NUM(count);
+}
+
+/*
+ * call-seq:
+ * str.delete!([other_str]+) => str or nil
+ *
+ * Performs a <code>delete</code> operation in place, returning <i>str</i>, or
+ * <code>nil</code> if <i>str</i> was not modified.
+ */
+
+static VALUE
+rstr_delete_bang(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+ rstr_modify(str);
+
+ TR_TABLE_CREATE();
+
+ UChar *chars = NULL;
+ long chars_len = 0;
+ bool need_free = false;
+ rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+ bool modified = false;
+ for (long i = 0; i < chars_len; i++) {
+ while (i < chars_len && TR_TABLE_INCLUDES(chars[i])) {
+ for (long j = i; j < chars_len - 1; j++) {
+ chars[j] = chars[j + 1];
+ }
+ chars_len--;
+ modified = true;
+ }
+ }
+
+ if (!modified) {
+ if (need_free) {
+ free(chars);
+ }
+ return Qnil;
+ }
+
+ if (need_free) {
+ str_replace_with_uchars(RSTR(str), chars, chars_len);
+ free(chars);
+ }
+ else {
+ RSTR(str)->length_in_bytes = UCHARS_TO_BYTES(chars_len);
+ }
+
+ return str;
+}
+
+/*
+ * call-seq:
+ * str.delete([other_str]+) => new_str
+ *
+ * Returns a copy of <i>str</i> with all characters in the intersection of its
+ * arguments deleted. Uses the same rules for building the set of characters as
+ * <code>String#count</code>.
+ *
+ * "hello".delete "l","lo" #=> "heo"
+ * "hello".delete "lo" #=> "he"
+ * "hello".delete "aeiou", "^e" #=> "hell"
+ * "hello".delete "ej-m" #=> "ho"
+ */
+
+static VALUE
+rstr_delete(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+ str = rb_str_new3(str);
+ rstr_delete_bang(str, 0, argc, argv);
+ return str;
+}
+
+/*
+ * call-seq:
+ * str.squeeze!([other_str]*) => str or nil
+ *
+ * Squeezes <i>str</i> in place, returning either <i>str</i>, or
+ * <code>nil</code> if no changes were made.
+ */
+
+static VALUE
+rstr_squeeze_bang(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+ rstr_modify(str);
+
+ // If no arguments are provided, we build a pattern string that contains
+ // the characters of the receiver itself.
+ VALUE tmp[1];
+ if (argc == 0) {
+ tmp[0] = str;
+ argv = tmp;
+ argc = 1;
+ }
+
+ TR_TABLE_CREATE();
+
+ UChar *chars = NULL;
+ long chars_len = 0;
+ bool need_free = false;
+ rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+ bool modified = false;
+ for (long i = 0; i < chars_len; i++) {
+ UChar c = chars[i];
+ if (TR_TABLE_INCLUDES(c)) {
+ while (i + 1 < chars_len && chars[i + 1] == c) {
+ for (long j = i + 1; j < chars_len - 1; j++) {
+ chars[j] = chars[j + 1];
+ }
+ chars_len--;
+ modified = true;
+ }
+ }
+ }
+
+ if (!modified) {
+ if (need_free) {
+ free(chars);
+ }
+ return Qnil;
+ }
+
+ if (need_free) {
+ str_replace_with_uchars(RSTR(str), chars, chars_len);
+ free(chars);
+ }
+ else {
+ RSTR(str)->length_in_bytes = UCHARS_TO_BYTES(chars_len);
+ }
+
+ return str;
+}
+
+/*
+ * call-seq:
+ * str.squeeze([other_str]*) => new_str
+ *
+ * Builds a set of characters from the <i>other_str</i> parameter(s) using the
+ * procedure described for <code>String#count</code>. Returns a new string
+ * where runs of the same character that occur in this set are replaced by a
+ * single character. If no arguments are given, all runs of identical
+ * characters are replaced by a single character.
+ *
+ * "yellow moon".squeeze #=> "yelow mon"
+ * " now is the".squeeze(" ") #=> " now is the"
+ * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
+ */
+
+static VALUE
+rstr_squeeze(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+ str = rb_str_new3(str);
+ rstr_squeeze_bang(str, 0, argc, argv);
+ return str;
+}
+
// NSString primitives.
static void
@@ -4632,6 +4917,11 @@
rb_objc_define_method(rb_cRubyString, "upto", rstr_upto, -1);
rb_objc_define_method(rb_cRubyString, "reverse", rstr_reverse, 0);
rb_objc_define_method(rb_cRubyString, "reverse!", rstr_reverse_bang, 0);
+ rb_objc_define_method(rb_cRubyString, "count", rstr_count, -1);
+ rb_objc_define_method(rb_cRubyString, "delete", rstr_delete, -1);
+ rb_objc_define_method(rb_cRubyString, "delete!", rstr_delete_bang, -1);
+ rb_objc_define_method(rb_cRubyString, "squeeze", rstr_squeeze, -1);
+ rb_objc_define_method(rb_cRubyString, "squeeze!", rstr_squeeze_bang, -1);
// MacRuby extensions.
rb_objc_define_method(rb_cRubyString, "transform", rstr_transform, 1);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100304/a3b1e787/attachment.html>
More information about the macruby-changes
mailing list