[macruby-changes] [3487] MacRuby/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Wed Feb 10 18:36:17 PST 2010
Revision: 3487
http://trac.macosforge.org/projects/ruby/changeset/3487
Author: lsansonetti at apple.com
Date: 2010-02-10 18:36:16 -0800 (Wed, 10 Feb 2010)
Log Message:
-----------
new sprintf implementation (thanks Daniel Cavanagh)
Modified Paths:
--------------
MacRuby/trunk/include/ruby/encoding.h
MacRuby/trunk/include/ruby/ruby.h
MacRuby/trunk/numeric.c
MacRuby/trunk/sprintf.cpp
Modified: MacRuby/trunk/include/ruby/encoding.h
===================================================================
--- MacRuby/trunk/include/ruby/encoding.h 2010-02-11 02:16:52 UTC (rev 3486)
+++ MacRuby/trunk/include/ruby/encoding.h 2010-02-11 02:36:16 UTC (rev 3487)
@@ -221,6 +221,8 @@
//VALUE rb_locale_charmap(VALUE klass);
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
+VALUE rb_num_to_chr(VALUE, rb_encoding *);
+
RUBY_EXTERN VALUE rb_cEncoding;
#define ENC_UNINITIALIZED (&rb_cEncoding)
Modified: MacRuby/trunk/include/ruby/ruby.h
===================================================================
--- MacRuby/trunk/include/ruby/ruby.h 2010-02-11 02:16:52 UTC (rev 3486)
+++ MacRuby/trunk/include/ruby/ruby.h 2010-02-11 02:36:16 UTC (rev 3487)
@@ -939,6 +939,7 @@
ID rb_intern_str(VALUE str);
ID rb_to_id(VALUE);
VALUE rb_id2str(ID);
+VALUE rb_name2sym(const char *);
#if WITH_OBJC
# define rb_sym2name(sym) (RSYMBOL(sym)->str)
static inline
Modified: MacRuby/trunk/numeric.c
===================================================================
--- MacRuby/trunk/numeric.c 2010-02-11 02:16:52 UTC (rev 3486)
+++ MacRuby/trunk/numeric.c 2010-02-11 02:36:16 UTC (rev 3487)
@@ -1964,6 +1964,29 @@
return rb_vm_call(num, selMINUS, 1, &one, false);
}
+VALUE
+rb_num_to_chr(VALUE num, rb_encoding *enc)
+{
+ // XXX completely broken
+ long i = NUM2LONG(num);
+ char c[2] = {i, '\0'};
+
+ if (enc) {
+ return rb_enc_str_new(c, 1, enc);
+ }
+ else {
+ if (i < 0 || 0xff < i) {
+ rb_raise(rb_eRangeError, "%"PRIdVALUE " out of char range", i);
+ }
+ if (i < 0x80) {
+ return rb_usascii_str_new(c, 1);
+ }
+ else {
+ return rb_str_new(c, 1);
+ }
+ }
+}
+
/*
* call-seq:
* int.chr([encoding]) => string
@@ -1979,31 +2002,10 @@
static VALUE
int_chr(VALUE num, SEL sel, int argc, VALUE *argv)
{
- long i = NUM2LONG(num);
- char c[2] = {i, '\0'};
- rb_encoding *enc;
- VALUE str;
-
- switch (argc) {
- case 0:
- if (i < 0 || 0xff < i) {
- rb_raise(rb_eRangeError, "%"PRIdVALUE " out of char range", i);
- }
- if (i < 0x80) {
- return rb_usascii_str_new(c, 1);
- }
- else {
- return rb_str_new(c, 1);
- }
- case 1:
- break;
- default:
+ if (argc > 1) {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1)", argc);
- break;
}
- enc = rb_to_encoding(argv[0]);
- str = rb_enc_str_new(c, 1, enc);
- return str;
+ return rb_num_to_chr(num, (argc ? rb_to_encoding(argv[0]) : NULL));
}
static VALUE
Modified: MacRuby/trunk/sprintf.cpp
===================================================================
--- MacRuby/trunk/sprintf.cpp 2010-02-11 02:16:52 UTC (rev 3486)
+++ MacRuby/trunk/sprintf.cpp 2010-02-11 02:36:16 UTC (rev 3487)
@@ -266,7 +266,8 @@
*/
#define GETNTHARG(nth) \
- ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth])
+ ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : \
+ argv[nth])
extern "C" {
@@ -323,177 +324,474 @@
return result;
}
+#define IS_NEG(num) RBIGNUM_NEGATIVE_P(num)
+#define REL_REF 1
+#define ABS_REF 2
+#define NAMED_REF 3
+
+#define REF_NAME(type) \
+ ((type) == REL_REF ? "relative" : (type) == ABS_REF ? "absolute" : "named")
+
+#define SET_REF_TYPE(type) \
+ if (ref_type != 0 && (type) != ref_type) { \
+ rb_raise(rb_eArgError, "can't mix %s references with %s references", \
+ REF_NAME(type), REF_NAME(ref_type)); \
+ } \
+ ref_type = (type);
+
+#define GET_ARG() \
+ if (arg == 0) { \
+ SET_REF_TYPE(REL_REF); \
+ arg = GETNTHARG(j); \
+ j++; \
+ }
+
+#define isprenum(ch) ((ch) == '-' || (ch) == ' ' || (ch) == '+')
+
+#define isnan(x) (x != x)
+#define isinf(x) (__builtin_fabs(x) == __builtin_inf())
+
static void
-get_types_for_format_str(std::string &octypes, const unsigned int len,
- VALUE *args, const char *format_str, char **new_fmt)
+pad_format_value(VALUE arg, long start, long width,
+ CFStringRef pad)
{
- size_t format_str_len = strlen(format_str);
- unsigned int i = 0, j = 0;
+ long slen = (long)CFStringGetLength((CFStringRef)arg);
+ if (width <= slen) {
+ return;
+ }
+ if (start < 0) {
+ start += slen + 1;
+ }
+ width -= slen;
+ do {
+ CFStringInsert((CFMutableStringRef)arg, start, pad);
+ } while (--width > 0);
+}
- while (i < format_str_len) {
- bool sharp_modifier = false;
- bool star_modifier = false;
- if (format_str[i++] != '%') {
+static long
+cstr_update(char **str, unsigned long start, unsigned long num, char *replace)
+{
+ unsigned long len = strlen(*str) + 1;
+ unsigned long replace_len = strlen(replace);
+ if (start + num > len) {
+ num = len - start;
+ }
+ if (replace_len >= num) {
+ char *new_str = (char *)xmalloc(len + replace_len - num);
+ memcpy(new_str, *str, len);
+ *str = new_str;
+ }
+ if (replace_len != num) {
+ bcopy(*str + start + num, *str + start + replace_len, len - start -
+ num);
+ }
+ if (replace_len > 0) {
+ bcopy(replace, *str + start, replace_len);
+ }
+ return replace_len - num;
+}
+
+VALUE
+get_named_arg(char *format_str, unsigned long format_len, unsigned long *i,
+ VALUE hash)
+{
+ if (TYPE(hash) != T_HASH) {
+ rb_raise(rb_eArgError,
+ "hash required for named references");
+ }
+ char closing = format_str[(*i)++] + 2;
+ char *str_ptr = format_str + *i;
+ while (*i < format_len && format_str[*i] != closing) {
+ (*i)++;
+ }
+ if (*i == format_len) {
+ rb_raise(rb_eArgError,
+ "malformed name - unmatched parenthesis");
+ }
+ format_str[*i] = '\0';
+ hash = rb_hash_aref(hash, rb_name2sym(str_ptr));
+ format_str[*i] = closing;
+ return (hash);
+}
+
+// XXX
+// - this method uses strtol to read numbers from the format string, so
+// extremely large numbers get silently truncated. this should be fixed
+// - switch to a cfstring format string to allow for proper encoding support
+
+// XXX look for arguments that are altered but not duped
+VALUE
+rb_str_format(int argc, const VALUE *argv, VALUE fmt)
+{
+ bool tainted = OBJ_TAINTED(fmt);
+ fmt = rb_str_new3(fmt);
+ char *format_str = (char *)RSTRING_PTR(fmt);
+ unsigned long format_len = strlen(format_str);
+ long num;
+ int j = 0;
+ int ref_type = 0;
+
+ for (unsigned long i = 0; i < format_len; i++) {
+ if (format_str[i] != '%') {
continue;
}
- if (i < format_str_len && format_str[i] == '%') {
- i++;
+ if (format_str[i + 1] == '%') {
+ cstr_update(&format_str, i, 1, (char *)"");
continue;
}
- while (i < format_str_len) {
- char type = 0;
+
+ bool sharp_flag = false;
+ bool space_flag = false;
+ bool plus_flag = false;
+ bool minus_flag = false;
+ bool zero_flag = false;
+ bool precision_flag = false;
+ bool complete = false;
+ VALUE arg = 0;
+ long width = 0;
+ long precision = 0;
+ int base = 0;
+ CFStringRef negative_pad = NULL;
+ CFStringRef sharp_pad = CFSTR("");
+ char *str_ptr;
+
+ unsigned long start = i;
+ while (i++ < format_len) {
switch (format_str[i]) {
case '#':
- sharp_modifier = true;
+ sharp_flag = true;
break;
case '*':
- star_modifier = true;
- type = _C_INT;
+ if (format_str[++i] == '<' || format_str[i] == '{') {
+ SET_REF_TYPE(NAMED_REF);
+ width = NUM2LONG(rb_Integer(get_named_arg(format_str,
+ format_len, &i, GETNTHARG(0))));
+ }
+ else {
+ if (isprenum(format_str[i])) {
+ i--;
+ break;
+ }
+ num = strtol(format_str + i, &str_ptr, 10);
+ if (str_ptr == format_str + i--) {
+ SET_REF_TYPE(REL_REF);
+ width = NUM2LONG(rb_Integer(GETNTHARG(j)));
+ j++;
+ }
+ else if (*str_ptr == '$') {
+ SET_REF_TYPE(ABS_REF);
+ width = NUM2LONG(rb_Integer(GETNTHARG(num - 1)));
+ i = str_ptr - format_str;
+ }
+ }
+ if (width < 0) {
+ minus_flag = true;
+ width = -width;
+ }
break;
+ case ' ':
+ if (!plus_flag) {
+ space_flag = true;
+ }
+ break;
+
+ case '+':
+ plus_flag = true;
+ space_flag = false;
+ break;
+
+ case '-':
+ zero_flag = false;
+ minus_flag = true;
+ break;
+
+ case '0':
+ if (!precision_flag && !minus_flag) {
+ zero_flag = true;
+ }
+ break;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ num = strtol(format_str + i, &str_ptr, 10);
+ i = str_ptr - format_str;
+ if (*str_ptr == '$') {
+ if (num == 0) {
+ rb_raise(rb_eArgError, "invalid absolute argument");
+ }
+ SET_REF_TYPE(ABS_REF);
+ arg = GETNTHARG(num - 1);
+ }
+ else {
+ SET_REF_TYPE(REL_REF);
+ width = num;
+ i--;
+ }
+ break;
+
+ case '.':
+ zero_flag = false;
+ precision_flag = true;
+ if (format_str[++i] == '*') {
+ if (format_str[++i] == '<' || format_str[i] == '{') {
+ SET_REF_TYPE(NAMED_REF);
+ precision = NUM2LONG(rb_Integer(get_named_arg(
+ format_str, format_len, &i, GETNTHARG(0))));
+ }
+ else {
+ if (isprenum(format_str[i])) {
+ i--;
+ break;
+ }
+ num = strtol(format_str + i, &str_ptr, 10);
+ if (str_ptr == format_str + i--) {
+ SET_REF_TYPE(REL_REF);
+ precision = NUM2LONG(rb_Integer(GETNTHARG(j)));
+ j++;
+ }
+ else if (*str_ptr == '$') {
+ SET_REF_TYPE(ABS_REF);
+ precision = NUM2LONG(rb_Integer(GETNTHARG(
+ num - 1)));
+ i = str_ptr - format_str;
+ }
+ }
+ }
+ else if (isdigit(format_str[i])) {
+ precision = strtol(format_str + i, &str_ptr, 10);
+ i = str_ptr - format_str - 1;
+ }
+ else {
+ rb_raise(rb_eArgError, "invalid precision");
+ }
+
+ if (precision < 0) {
+ precision = 0;
+ }
+ break;
+
+ case '<':
+ case '{':
+ SET_REF_TYPE(NAMED_REF);
+ arg = get_named_arg(format_str, format_len, &i,
+ GETNTHARG(0));
+ break;
+
case 'd':
+ case 'D':
case 'i':
- case 'o':
case 'u':
+ case 'U':
+ base = 10;
+ complete = true;
+ break;
+
case 'x':
case 'X':
- type = _C_INT;
+ base = 16;
+ negative_pad = CFSTR("f");
+ sharp_pad = CFSTR("0x");
+ complete = true;
break;
+ case 'o':
+ case 'O':
+ base = 8;
+ negative_pad = CFSTR("7");
+ sharp_pad = CFSTR("0");
+ complete = true;
+ break;
+
+ case 'B':
+ case 'b':
+ base = 2;
+ negative_pad = CFSTR("1");
+ sharp_pad = CFSTR("0b");
+ complete = true;
+ break;
+
case 'c':
case 'C':
- type = _C_CHR;
+ GET_ARG();
+ if (TYPE(arg) == T_STRING) {
+ arg = rb_str_substr(arg, 0, 1);
+ }
+ else {
+ // rb_num_to_chr is broken so leave out the
+ // enc or we don't get range checking
+ arg = rb_num_to_chr(arg, NULL /*rb_enc_get(fmt)*/);
+ }
+ complete = true;
break;
- case 'D':
- case 'O':
- case 'U':
- type = _C_LNG;
- break;
-
- case 'f':
+ case 'f':
case 'F':
- case 'e':
+ case 'e':
case 'E':
- case 'g':
+ case 'g':
case 'G':
case 'a':
case 'A':
- type = _C_DBL;
- break;
-
- case 's':
- case 'S':
- {
- if (i - 1 > 0) {
- unsigned long k = i - 1;
- while (k > 0 && format_str[k] == '0') {
- k--;
+ {
+ // here we construct a new format str and then use
+ // c's sprintf. why? because floats are retarded
+ GET_ARG();
+ double value = RFLOAT_VALUE(rb_Float(arg));
+ complete = true;
+
+ if (isnan(value) || isinf(value)) {
+ arg = rb_str_new2((char *)(isnan(value) ? "NaN" :
+ value < 0 ? "-Inf" : "Inf"));
+ if (isnan(value) || value > 0) {
+ if (plus_flag) {
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR("+"));
}
- if (k < i && format_str[k] == '.') {
- args[j] = (VALUE)CFSTR("");
+ else if (space_flag) {
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR(" "));
}
}
-#if 1
- // In Ruby, '%s' is supposed to convert the argument
- // as a string, calling #to_s on it. In order to
- // support this behavior we are changing the format
- // to '@' which sends the -[NSObject description]
- // message, exactly what we want.
- if (*new_fmt == NULL) {
- *new_fmt = strdup(format_str);
- }
- (*new_fmt)[i] = '@';
- type = _C_ID;
-#else
- type = _C_CHARPTR;
-#endif
+ break;
}
+
+ arg = rb_str_new(format_str + i, 1);
+ if (precision_flag) {
+ rb_str_update(arg, 0, 0, rb_big2str(LONG2NUM(precision),
+ 10));
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR("."));
+ }
+ rb_str_update(arg, 0, 0, rb_big2str(LONG2NUM(width), 10));
+ if (minus_flag) {
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR("-"));
+ }
+ else if (zero_flag) {
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR("0"));
+ }
+ if (plus_flag) {
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR("+"));
+ }
+ else if (space_flag) {
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR(" "));
+ }
+ if (sharp_flag) {
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR("#"));
+ }
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR("%"));
+
+ asprintf(&str_ptr, RSTRING_PTR(arg), value);
+ arg = rb_str_new2(str_ptr);
+ free(str_ptr);
break;
+ }
+ case 's':
+ case 'S':
case 'p':
case '@':
- type = _C_ID;
+ GET_ARG();
+ arg = (tolower(format_str[i]) != 's' ? rb_inspect(arg)
+ : TYPE(arg) == T_STRING ? rb_str_new3(arg)
+ : rb_obj_as_string(arg));
+ if (precision_flag && precision
+ < CFStringGetLength((CFStringRef)arg)) {
+ CFStringPad((CFMutableStringRef)arg, NULL, precision,
+ 0);
+ }
+ complete = true;
break;
- case 'B':
- case 'b':
- {
- VALUE arg = args[j];
- switch (TYPE(arg)) {
- case T_STRING:
- arg = rb_str_to_inum(arg, 0, Qtrue);
- break;
- }
- arg = rb_big2str(arg, 2);
- if (sharp_modifier) {
- VALUE prefix = format_str[i] == 'B'
- ? (VALUE)CFSTR("0B") : (VALUE)CFSTR("0b");
- rb_str_update(arg, 0, 0, prefix);
- }
- if (*new_fmt == NULL) {
- *new_fmt = strdup(format_str);
- }
- (*new_fmt)[i] = '@';
- args[j] = arg;
- type = _C_ID;
- }
- break;
+ default:
+ rb_raise(rb_eArgError, "malformed format string - %%%c",
+ format_str[i]);
}
+ if (!complete) {
+ continue;
+ }
- i++;
+ GET_ARG();
- if (type != 0) {
- if (len == 0 || j >= len) {
- rb_raise(rb_eArgError,
- "Too much tokens in the format string `%s' "\
- "for the given %d argument(s)", format_str, len);
+ if (base != 0) {
+ bool sign_pad = false;
+ unsigned long num_index = 0;
+ CFStringRef zero_pad = CFSTR("0");
+
+ VALUE num = rb_Integer(arg);
+ if (TYPE(num) == T_FIXNUM) {
+ num = rb_int2big(FIX2LONG(num));
}
- octypes.push_back(type);
- j++;
- if (!star_modifier) {
- break;
+ if (plus_flag || space_flag) {
+ sign_pad = 1;
}
- }
- }
- }
- for (; j < len; j++) {
- octypes.push_back(_C_ID);
- }
-}
+ if (IS_NEG(num)) {
+ num_index = 1;
+ if (!sign_pad && negative_pad != NULL) {
+ zero_pad = negative_pad;
+ num = rb_big_clone(num);
+ rb_big_2comp(num);
+ }
+ }
-VALUE
-rb_str_format(int argc, const VALUE *argv, VALUE fmt)
-{
- if (argc == 0) {
- return fmt;
- }
+ arg = rb_big2str(num, base);
+ if (!sign_pad && IS_NEG(num) && negative_pad != NULL) {
+ char neg = *RSTRING_PTR(negative_pad);
+ str_ptr = (char *)RSTRING_PTR(arg) + 1;
+ if (base == 8) {
+ *str_ptr |= ((~0 << 3) >> ((3 * strlen(str_ptr)) %
+ (sizeof(BDIGIT) * 8))) & ~(~0 << 3);
+ }
+ while (*str_ptr++ == neg) {
+ num_index++;
+ }
+ rb_str_update(arg, 0, num_index, (VALUE)negative_pad);
+ rb_str_update(arg, 0, 0, (VALUE)CFSTR(".."));
+ num_index = 2;
+ }
- char *new_fmt = NULL;
- std::string types("@@@@");
- get_types_for_format_str(types, (unsigned int)argc, (VALUE *)argv,
- RSTRING_PTR(fmt), &new_fmt);
+ if (precision_flag) {
+ pad_format_value(arg, num_index, precision + (IS_NEG(num) &&
+ (sign_pad || negative_pad == NULL) ? 1 : 0),
+ zero_pad);
+ }
+ if (sharp_flag && rb_cmpint(num, NULL, NULL) != 0) {
+ rb_str_update(arg, sign_pad, 0, (VALUE)sharp_pad);
+ num_index += 2;
+ }
+ if (sign_pad && RBIGNUM_POSITIVE_P(num)) {
+ rb_str_update(arg, 0, 0, (VALUE)(plus_flag ?
+ CFSTR("+") : CFSTR(" ")));
+ num_index++;
+ }
+ if (zero_flag) {
+ pad_format_value(arg, num_index, width, zero_pad);
+ }
+ if (ISUPPER(format_str[i])) {
+ CFStringUppercase((CFMutableStringRef)arg, NULL);
+ }
+ }
+
+ if (OBJ_TAINTED(arg)) {
+ tainted = true;
+ }
- if (new_fmt != NULL) {
- fmt = rb_str_new2(new_fmt);
- }
-
- VALUE *stub_args = (VALUE *)malloc(sizeof(VALUE) * (argc + 4));
- stub_args[0] = Qnil; // allocator
- stub_args[1] = Qnil; // format options
- stub_args[2] = fmt; // format string
- for (int i = 0; i < argc; i++) {
- stub_args[3 + i] = argv[i];
+ pad_format_value(arg, minus_flag ? -1 : 0, width, CFSTR(" "));
+ num = cstr_update(&format_str, start, i - start + 1,
+ (char *)RSTRING_PTR(arg));
+ format_len += num;
+ i += num;
+ break;
+ }
}
- rb_vm_c_stub_t *stub = (rb_vm_c_stub_t *)GET_CORE()->gen_stub(types,
- true, 3, false);
-
- VALUE str = (*stub)((IMP)&CFStringCreateWithFormat, argc + 3, stub_args);
- CFMakeCollectable((void *)str);
- free(stub_args);
- return str;
+ fmt = rb_str_new2(format_str);
+ return tainted ? OBJ_TAINT(fmt) : fmt;
}
} // extern "C"
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100210/9b6df4d5/attachment-0001.html>
More information about the macruby-changes
mailing list