[macruby-changes] [3663] MacRuby/branches/icu
source_changes at macosforge.org
source_changes at macosforge.org
Mon Mar 1 23:00:27 PST 2010
Revision: 3663
http://trac.macosforge.org/projects/ruby/changeset/3663
Author: lsansonetti at apple.com
Date: 2010-03-01 23:00:25 -0800 (Mon, 01 Mar 2010)
Log Message:
-----------
unicode string formats (a work in progress)
Modified Paths:
--------------
MacRuby/branches/icu/encoding.h
MacRuby/branches/icu/sprintf.c
MacRuby/branches/icu/string.c
Modified: MacRuby/branches/icu/encoding.h
===================================================================
--- MacRuby/branches/icu/encoding.h 2010-03-02 06:50:51 UTC (rev 3662)
+++ MacRuby/branches/icu/encoding.h 2010-03-02 07:00:25 UTC (rev 3663)
@@ -302,6 +302,8 @@
UChar rb_str_get_uchar(VALUE str, long pos);
void rb_str_append_uchar(VALUE str, UChar c);
unsigned long rb_str_hash_uchars(const UChar *chars, long chars_len);
+long rb_uchar_strtol(UniChar *chars, long chars_len, long pos,
+ long *end_offset);
// Return a string object appropriate for bstr_ calls. This does nothing for
// data/binary RubyStrings.
Modified: MacRuby/branches/icu/sprintf.c
===================================================================
--- MacRuby/branches/icu/sprintf.c 2010-03-02 06:50:51 UTC (rev 3662)
+++ MacRuby/branches/icu/sprintf.c 2010-03-02 07:00:25 UTC (rev 3663)
@@ -13,6 +13,7 @@
#include "ruby/ruby.h"
#include "ruby/encoding.h"
+#include "encoding.h"
/*
* call-seq:
@@ -289,13 +290,10 @@
VALUE
rb_enc_sprintf(rb_encoding *enc, const char *format, ...)
{
- VALUE result;
va_list ap;
-
va_start(ap, format);
- result = rb_enc_vsprintf(enc, format, ap);
+ VALUE result = rb_enc_vsprintf(enc, format, ap);
va_end(ap);
-
return result;
}
@@ -308,13 +306,10 @@
VALUE
rb_sprintf(const char *format, ...)
{
- VALUE result;
va_list ap;
-
va_start(ap, format);
- result = rb_vsprintf(format, ap);
+ VALUE result = rb_vsprintf(format, ap);
va_end(ap);
-
return result;
}
@@ -356,42 +351,50 @@
width -= slen;
do {
CFStringInsert((CFMutableStringRef)arg, start, pad);
- } while (--width > 0);
+ }
+ while (--width > 0);
}
static long
-cstr_update(char **str, unsigned long start, unsigned long num, char *replace)
+cstr_update(UChar **str, long *str_len, long start, long num, VALUE replace)
{
- unsigned long len = strlen(*str) + 1;
- unsigned long replace_len = strlen(replace);
+ const long len = *str_len;
+ long replace_len = replace == 0 ? 0 : rb_str_chars_len(replace);
if (start + num > len) {
num = len - start;
}
if (replace_len >= num) {
- char *new_str = (char *)xmalloc(len + replace_len - num);
- memcpy(new_str, *str, len);
- *str = new_str;
+ *str_len = len + replace_len - num;
+ *str = (UChar *)xrealloc(*str,
+ sizeof(UChar) * (len + replace_len - num));
}
if (replace_len != num) {
- bcopy(*str + start + num, *str + start + replace_len, len - start -
- num);
+ bcopy(*str + start + num, *str + start + replace_len,
+ sizeof(UChar) * (len - start - num));
}
if (replace_len > 0) {
- bcopy(replace, *str + start, replace_len);
+ UChar *replace_chars = NULL;
+ bool need_free = false;
+ rb_str_get_uchars(replace, &replace_chars, &replace_len, &need_free);
+ assert(replace_len > 0);
+ bcopy(replace_chars, *str + start, sizeof(UChar) * replace_len);
+ if (need_free) {
+ free(replace_chars);
+ }
}
return replace_len - num;
}
-VALUE
-get_named_arg(char *format_str, unsigned long format_len, unsigned long *i,
+static VALUE
+get_named_arg(UChar *format_str, long format_len, unsigned long *i,
VALUE hash)
{
if (TYPE(hash) != T_HASH) {
rb_raise(rb_eArgError,
"hash required for named references");
}
- char closing = format_str[(*i)++] + 2;
- char *str_ptr = format_str + *i;
+ UChar closing = format_str[(*i)++] + 2;
+ UChar *str_ptr = &format_str[*i];
while (*i < format_len && format_str[*i] != closing) {
(*i)++;
}
@@ -399,26 +402,32 @@
rb_raise(rb_eArgError,
"malformed name - unmatched parenthesis");
}
- format_str[*i] = '\0';
- hash = rb_hash_aref(hash, rb_name2sym(str_ptr));
- format_str[*i] = closing;
- return (hash);
+ VALUE substr = rb_unicode_str_new(str_ptr, str_ptr - format_str);
+ hash = rb_hash_aref(hash, ID2SYM(rb_intern_str(substr)));
+ return hash;
}
-// XXX
-// - this method uses strtol to read numbers from the format string, so
-// extremely large numbers get silently truncated. this should be fixed
-// - switch to a cfstring format string to allow for proper encoding support
-
// XXX look for arguments that are altered but not duped
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
bool tainted = OBJ_TAINTED(fmt);
- fmt = rb_str_new3(fmt);
- char *format_str = (char *)RSTRING_PTR(fmt);
- unsigned long format_len = strlen(format_str);
- long num;
+
+ UChar *format_str = NULL;
+ long format_len = 0;
+ bool need_free = false;
+ rb_str_get_uchars(fmt, &format_str, &format_len, &need_free);
+ if (format_len == 0) {
+ goto bail;
+ }
+ UChar *tmp = (UChar *)xmalloc(format_len * sizeof(UChar));
+ memcpy(tmp, format_str, format_len * sizeof(UChar));
+ if (need_free) {
+ free(format_str);
+ }
+ format_str = tmp;
+
+ long num, pos;
int j = 0;
int ref_type = 0;
@@ -427,7 +436,7 @@
continue;
}
if (format_str[i + 1] == '%') {
- cstr_update(&format_str, i, 1, (char *)"");
+ cstr_update(&format_str, &format_len, i, 1, 0);
continue;
}
@@ -444,7 +453,6 @@
int base = 0;
CFStringRef negative_pad = NULL;
CFStringRef sharp_pad = CFSTR("");
- char *str_ptr;
unsigned long start = i;
while (i++ < format_len) {
@@ -464,16 +472,17 @@
i--;
break;
}
- num = strtol(format_str + i, &str_ptr, 10);
- if (str_ptr == format_str + i--) {
+
+ num = rb_uchar_strtol(format_str, format_len, i, &pos);
+ if (pos == i--) {
SET_REF_TYPE(REL_REF);
width = NUM2LONG(rb_Integer(GETNTHARG(j)));
j++;
}
- else if (*str_ptr == '$') {
+ else if (format_str[pos] == '$') {
SET_REF_TYPE(ABS_REF);
width = NUM2LONG(rb_Integer(GETNTHARG(num - 1)));
- i = str_ptr - format_str;
+ i = pos;
}
}
if (width < 0) {
@@ -513,9 +522,9 @@
case '7':
case '8':
case '9':
- num = strtol(format_str + i, &str_ptr, 10);
- i = str_ptr - format_str;
- if (*str_ptr == '$') {
+ num = rb_uchar_strtol(format_str, format_len, i, &pos);
+ i = pos;
+ if (format_str[pos] == '$') {
if (num == 0) {
rb_raise(rb_eArgError, "invalid absolute argument");
}
@@ -543,23 +552,26 @@
i--;
break;
}
- num = strtol(format_str + i, &str_ptr, 10);
- if (str_ptr == format_str + i--) {
+
+ num = rb_uchar_strtol(format_str, format_len,
+ i, &pos);
+ if (num == i--) {
SET_REF_TYPE(REL_REF);
precision = NUM2LONG(rb_Integer(GETNTHARG(j)));
j++;
}
- else if (*str_ptr == '$') {
+ else if (format_str[pos] == '$') {
SET_REF_TYPE(ABS_REF);
precision = NUM2LONG(rb_Integer(GETNTHARG(
num - 1)));
- i = str_ptr - format_str;
+ i = pos;
}
}
}
else if (isdigit(format_str[i])) {
- precision = strtol(format_str + i, &str_ptr, 10);
- i = str_ptr - format_str - 1;
+ precision = rb_uchar_strtol(format_str, format_len,
+ i, &pos);
+ i = pos - 1;
}
else {
rb_raise(rb_eArgError, "invalid precision");
@@ -653,7 +665,7 @@
break;
}
- arg = rb_str_new(format_str + i, 1);
+ arg = rb_unicode_str_new(&format_str[i], 1);
if (precision_flag) {
rb_str_update(arg, 0, 0, rb_big2str(LONG2NUM(precision),
10));
@@ -677,9 +689,10 @@
}
rb_str_update(arg, 0, 0, (VALUE)CFSTR("%"));
- asprintf(&str_ptr, RSTRING_PTR(arg), value);
- arg = rb_str_new2(str_ptr);
- free(str_ptr);
+ char *ptr;
+ asprintf(&ptr, RSTRING_PTR(arg), value);
+ arg = rb_str_new2(ptr);
+ free(ptr);
break;
}
@@ -732,8 +745,10 @@
arg = rb_big2str(num, base);
if (!sign_pad && IS_NEG(num) && negative_pad != NULL) {
- char neg = *RSTRING_PTR(negative_pad);
- str_ptr = (char *)RSTRING_PTR(arg) + 1;
+ break; // TODO
+#if 0
+ UChar neg = CFStringGetCharacterAtIndex(negative_pad, 0);
+ char *str_ptr = (char *)RSTRING_PTR(arg) + 1;
if (base == 8) {
*str_ptr |= ((~0 << 3) >> ((3 * strlen(str_ptr)) %
(sizeof(BDIGIT) * 8))) & ~(~0 << 3);
@@ -744,11 +759,13 @@
rb_str_update(arg, 0, num_index, (VALUE)negative_pad);
rb_str_update(arg, 0, 0, (VALUE)CFSTR(".."));
num_index = 2;
+#endif
}
if (precision_flag) {
- pad_format_value(arg, num_index, precision + (IS_NEG(num) &&
- (sign_pad || negative_pad == NULL) ? 1 : 0),
+ pad_format_value(arg, num_index,
+ precision + (IS_NEG(num)
+ && (sign_pad || negative_pad == NULL) ? 1 : 0),
zero_pad);
}
if (sharp_flag && rb_cmpint(num, Qfalse, Qfalse) != 0) {
@@ -773,14 +790,17 @@
}
pad_format_value(arg, minus_flag ? -1 : 0, width, CFSTR(" "));
- num = cstr_update(&format_str, start, i - start + 1,
- (char *)RSTRING_PTR(arg));
- format_len += num;
+ num = cstr_update(&format_str, &format_len, start, i - start + 1,
+ arg);
i += num;
break;
}
}
- fmt = rb_str_new2(format_str);
- return tainted ? OBJ_TAINT(fmt) : fmt;
+bail:
+ fmt = rb_unicode_str_new(format_str, format_len);
+ if (tainted) {
+ OBJ_TAINT(fmt);
+ }
+ return fmt;
}
Modified: MacRuby/branches/icu/string.c
===================================================================
--- MacRuby/branches/icu/string.c 2010-03-02 06:50:51 UTC (rev 3662)
+++ MacRuby/branches/icu/string.c 2010-03-02 07:00:25 UTC (rev 3663)
@@ -22,6 +22,8 @@
#include "ruby/node.h"
#include "vm.h"
+#include <unicode/unum.h>
+
VALUE rb_cString;
VALUE rb_cNSString;
VALUE rb_cNSMutableString;
@@ -4152,6 +4154,25 @@
}
long
+rb_uchar_strtol(UniChar *chars, long chars_len, long pos, long *end_offset)
+{
+ assert(chars != NULL && chars_len > 0 && pos >= 0);
+
+ UErrorCode status = U_ZERO_ERROR;
+ UNumberFormat *nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+ assert(nf != NULL);
+
+ int32_t parse_pos = (int32_t)pos;
+ int64_t val = unum_parseInt64(nf, chars, chars_len, &parse_pos, &status);
+ unum_close(nf);
+
+ if (end_offset != NULL) {
+ *end_offset = (long)parse_pos;
+ }
+ return val;
+}
+
+long
rb_memhash(const void *ptr, long len)
{
CFDataRef data = CFDataCreate(NULL, (const UInt8 *)ptr, len);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100301/166dd1b2/attachment-0001.html>
More information about the macruby-changes
mailing list