[macruby-changes] [3714] MacRuby/branches/icu/string.c
source_changes at macosforge.org
source_changes at macosforge.org
Mon Mar 8 17:07:18 PST 2010
Revision: 3714
http://trac.macosforge.org/projects/ruby/changeset/3714
Author: lsansonetti at apple.com
Date: 2010-03-08 17:07:18 -0800 (Mon, 08 Mar 2010)
Log Message:
-----------
added #tr, small refactoring
Modified Paths:
--------------
MacRuby/branches/icu/string.c
Modified: MacRuby/branches/icu/string.c
===================================================================
--- MacRuby/branches/icu/string.c 2010-03-08 22:33:58 UTC (rev 3713)
+++ MacRuby/branches/icu/string.c 2010-03-09 01:07:18 UTC (rev 3714)
@@ -4492,7 +4492,8 @@
*/
static void
-intersect_tr_table(char *tbl, VALUE source)
+fill_linear_charset_buffer(char *buf, long bufsize, long *lenp, bool *negatep,
+ VALUE source)
{
StringValue(source);
@@ -4502,20 +4503,19 @@
rb_str_get_uchars(source, &chars, &chars_len, &need_free);
long pos = 0;
- bool negate = false;
- if (chars_len > 0 && chars[0] == '^') {
- pos++;
- negate = true;
+ if (negatep != NULL) {
+ if (chars_len > 0 && chars[0] == '^') {
+ *negatep = true;
+ pos++;
+ }
+ else {
+ *negatep = false;
+ }
}
- char buf[0xff];
- char cflag = negate ? 1 : 0;
- for (int i = 0; i < 0xff; i++) {
- buf[i] = cflag;
- }
+ bool error = false;
+ long bufpos = 0;
- bool error = false;
- cflag = negate ? 0 : 1;
while (pos < chars_len) {
UChar c = chars[pos];
@@ -4524,12 +4524,16 @@
UChar e = chars[pos + 2];
if (c > e) {
error = true;
- break;
+ goto bail;
}
if (c < 0xff && e < 0xff) {
while (c <= e) {
- buf[c & 0xff] = cflag;
+ if (bufpos >= bufsize) {
+ error = true;
+ goto bail;
+ }
+ buf[bufpos++] = (char)c;
c++;
}
}
@@ -4537,12 +4541,19 @@
}
else {
if (c < 0xff) {
- buf[c & 0xff] = cflag;
+ if (bufpos >= bufsize) {
+ error = true;
+ goto bail;
+ }
+ buf[bufpos++] = (char)c;
}
pos++;
}
}
+ *lenp = bufpos;
+
+bail:
if (need_free) {
free(chars);
}
@@ -4550,15 +4561,37 @@
if (error) {
rb_raise(rb_eArgError, "invalid string transliteration");
}
+}
+static void
+intersect_charset_table(char *tbl, VALUE source)
+{
+ // Generate linear buffer based on source pattern.
+ char buf[0xff];
+ bool negate = false;
+ long buflen = 0;
+ fill_linear_charset_buffer(buf, sizeof buf, &buflen, &negate, source);
+
+ // Create character table based on linear buffer.
+ char source_tbl[0xff];
+ char cflag = negate ? 1 : 0;
+ for (int i = 0; i < 0xff; i++) {
+ source_tbl[i] = cflag;
+ }
+ cflag = negate ? 0 : 1;
+ for (long i = 0; i < buflen; i++) {
+ char c = buf[i];
+ source_tbl[(int)c] = cflag;
+ }
+
// Intersect both tables.
for (int i = 0; i < 0xff; i++) {
- tbl[i] = tbl[i] && buf[i];
+ tbl[i] = tbl[i] && source_tbl[i];
}
}
static void
-create_tr_table(char *tbl, int argc, VALUE *argv)
+create_intersected_charset_table(char *tbl, int argc, VALUE *argv)
{
if (argc < 1) {
rb_raise(rb_eArgError, "wrong number of arguments");
@@ -4570,21 +4603,54 @@
}
for (int i = 0; i < argc; i++) {
- intersect_tr_table(tbl, argv[i]);
+ intersect_charset_table(tbl, argv[i]);
}
}
-#define TR_TABLE_CREATE() \
+static void
+create_translate_charset_table(char *tbl, VALUE source, VALUE repl)
+{
+ // Generate linear buffer based on source pattern.
+ char source_buf[0xff];
+ bool negate = false;
+ long source_buflen = 0;
+ fill_linear_charset_buffer(source_buf, sizeof source_buf, &source_buflen,
+ &negate, source);
+
+ // Generate linear buffer based on repl pattern.
+ char repl_buf[0xff];
+ long repl_buflen = 0;
+ fill_linear_charset_buffer(repl_buf, sizeof repl_buf, &repl_buflen,
+ NULL, repl);
+ assert(repl_buflen > 0);
+
+ // Fill the table with 0s.
+ for (int i = 0; i < 0xff; i++) {
+ tbl[i] = 0;
+ }
+
+ // Now fill the table based on the linear buffer values.
+ long pos = 0;
+ while (pos < source_buflen) {
+ const char source_c = source_buf[pos];
+ const char repl_c = pos >= repl_buflen
+ ? repl_buf[repl_buflen - 1] : repl_buf[pos];
+ tbl[(int)source_c] = repl_c;
+ pos++;
+ }
+}
+
+#define INTERSECT_CHARSET_TABLE_CREATE() \
char __tbl__[0xff]; \
- create_tr_table(__tbl__, argc, argv);
+ create_intersected_charset_table(__tbl__, argc, argv);
-#define TR_TABLE_INCLUDES(c) \
+#define CHARSET_TABLE_INCLUDES(c) \
((c) < 0xff && __tbl__[(c) & 0xff] == 1)
static VALUE
rstr_count(VALUE str, SEL sel, int argc, VALUE *argv)
{
- TR_TABLE_CREATE();
+ INTERSECT_CHARSET_TABLE_CREATE();
UChar *chars = NULL;
long chars_len = 0;
@@ -4593,7 +4659,7 @@
long count = 0;
for (long i = 0; i < chars_len; i++) {
- if (TR_TABLE_INCLUDES(chars[i])) {
+ if (CHARSET_TABLE_INCLUDES(chars[i])) {
count++;
}
}
@@ -4618,7 +4684,7 @@
{
rstr_modify(str);
- TR_TABLE_CREATE();
+ INTERSECT_CHARSET_TABLE_CREATE();
UChar *chars = NULL;
long chars_len = 0;
@@ -4627,7 +4693,7 @@
bool modified = false;
for (long i = 0; i < chars_len; i++) {
- while (i < chars_len && TR_TABLE_INCLUDES(chars[i])) {
+ while (i < chars_len && CHARSET_TABLE_INCLUDES(chars[i])) {
for (long j = i; j < chars_len - 1; j++) {
chars[j] = chars[j + 1];
}
@@ -4698,7 +4764,7 @@
argc = 1;
}
- TR_TABLE_CREATE();
+ INTERSECT_CHARSET_TABLE_CREATE();
UChar *chars = NULL;
long chars_len = 0;
@@ -4708,7 +4774,7 @@
bool modified = false;
for (long i = 0; i < chars_len; i++) {
UChar c = chars[i];
- if (TR_TABLE_INCLUDES(c)) {
+ if (CHARSET_TABLE_INCLUDES(c)) {
while (i + 1 < chars_len && chars[i + 1] == c) {
for (long j = i + 1; j < chars_len - 1; j++) {
chars[j] = chars[j + 1];
@@ -4762,6 +4828,134 @@
/*
* call-seq:
+ * str.tr!(from_str, to_str) => str or nil
+ *
+ * Translates <i>str</i> in place, using the same rules as
+ * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
+ * changes were made.
+ */
+
+static VALUE
+translate(VALUE str, VALUE source, VALUE repl, bool sflag)
+{
+ StringValue(source);
+ StringValue(repl);
+
+ if (rb_str_chars_len(repl) == 0) {
+ return rstr_delete_bang(str, 0, 1, &source);
+ }
+
+ rstr_modify(str);
+
+ char tbl[0xff];
+ create_translate_charset_table(tbl, source, repl);
+
+ UChar *chars = NULL;
+ long chars_len = 0;
+ bool need_free = false;
+ rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+ bool modified = false;
+ for (long i = 0; i < chars_len; i++) {
+ UChar c = chars[i];
+ if (c < 0xff) {
+ char repl = tbl[(c & 0xff)];
+ if (repl != 0) {
+ chars[i] = repl;
+ modified = true;
+// TODO
+// if (sflag) {
+// }
+ }
+ }
+ }
+
+ if (!modified) {
+ if (need_free) {
+ free(chars);
+ }
+ return Qnil;
+ }
+
+ if (need_free) {
+ str_replace_with_uchars(RSTR(str), chars, chars_len);
+ free(chars);
+ }
+// else {
+// RSTR(str)->length_in_bytes = UCHARS_TO_BYTES(chars_len);
+// }
+
+ return str;
+}
+
+static VALUE
+rstr_tr_bang(VALUE str, SEL sel, VALUE src, VALUE repl)
+{
+ return translate(str, src, repl, false);
+}
+
+/*
+ * call-seq:
+ * str.tr(from_str, to_str) => new_str
+ *
+ * Returns a copy of <i>str</i> with the characters in <i>from_str</i> replaced
+ * by the corresponding characters in <i>to_str</i>. If <i>to_str</i> is
+ * shorter than <i>from_str</i>, it is padded with its last character. Both
+ * strings may use the c1--c2 notation to denote ranges of characters, and
+ * <i>from_str</i> may start with a <code>^</code>, which denotes all
+ * characters except those listed.
+ *
+ * "hello".tr('aeiou', '*') #=> "h*ll*"
+ * "hello".tr('^aeiou', '*') #=> "*e**o"
+ * "hello".tr('el', 'ip') #=> "hippo"
+ * "hello".tr('a-y', 'b-z') #=> "ifmmp"
+ */
+
+static VALUE
+rstr_tr(VALUE str, SEL sel, VALUE src, VALUE repl)
+{
+ str = rb_str_new3(str);
+ rstr_tr_bang(str, 0, src, repl);
+ return str;
+}
+
+/*
+ * call-seq:
+ * str.tr_s!(from_str, to_str) => str or nil
+ *
+ * Performs <code>String#tr_s</code> processing on <i>str</i> in place,
+ * returning <i>str</i>, or <code>nil</code> if no changes were made.
+ */
+
+static VALUE
+rstr_tr_s_bang(VALUE str, SEL sel, VALUE src, VALUE repl)
+{
+ return translate(str, src, repl, true);
+}
+
+/*
+ * call-seq:
+ * str.tr_s(from_str, to_str) => new_str
+ *
+ * Processes a copy of <i>str</i> as described under <code>String#tr</code>,
+ * then removes duplicate characters in regions that were affected by the
+ * translation.
+ *
+ * "hello".tr_s('l', 'r') #=> "hero"
+ * "hello".tr_s('el', '*') #=> "h*o"
+ * "hello".tr_s('el', 'hx') #=> "hhxo"
+ */
+
+static VALUE
+rstr_tr_s(VALUE str, SEL sel, VALUE src, VALUE repl)
+{
+ str = rb_str_new3(str);
+ rstr_tr_s_bang(str, 0, src, repl);
+ return str;
+}
+
+/*
+ * call-seq:
* str.sum(n=16) => integer
*
* Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
@@ -5100,6 +5294,10 @@
rb_objc_define_method(rb_cRubyString, "delete!", rstr_delete_bang, -1);
rb_objc_define_method(rb_cRubyString, "squeeze", rstr_squeeze, -1);
rb_objc_define_method(rb_cRubyString, "squeeze!", rstr_squeeze_bang, -1);
+ rb_objc_define_method(rb_cRubyString, "tr", rstr_tr, 2);
+ rb_objc_define_method(rb_cRubyString, "tr!", rstr_tr_bang, 2);
+ rb_objc_define_method(rb_cRubyString, "tr_s", rstr_tr_s, 2);
+ rb_objc_define_method(rb_cRubyString, "tr_s!", rstr_tr_s_bang, 2);
rb_objc_define_method(rb_cRubyString, "sum", rstr_sum, -1);
rb_objc_define_method(rb_cRubyString, "hash", rstr_hash, 0);
rb_objc_define_method(rb_cRubyString, "partition", rstr_partition, 1);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100308/59eb115e/attachment-0001.html>
More information about the macruby-changes
mailing list