Revision: 3487 http://trac.macosforge.org/projects/ruby/changeset/3487 Author: lsansonetti@apple.com Date: 2010-02-10 18:36:16 -0800 (Wed, 10 Feb 2010) Log Message: ----------- new sprintf implementation (thanks Daniel Cavanagh) Modified Paths: -------------- MacRuby/trunk/include/ruby/encoding.h MacRuby/trunk/include/ruby/ruby.h MacRuby/trunk/numeric.c MacRuby/trunk/sprintf.cpp Modified: MacRuby/trunk/include/ruby/encoding.h =================================================================== --- MacRuby/trunk/include/ruby/encoding.h 2010-02-11 02:16:52 UTC (rev 3486) +++ MacRuby/trunk/include/ruby/encoding.h 2010-02-11 02:36:16 UTC (rev 3487) @@ -221,6 +221,8 @@ //VALUE rb_locale_charmap(VALUE klass); long rb_memsearch(const void*,long,const void*,long,rb_encoding*); +VALUE rb_num_to_chr(VALUE, rb_encoding *); + RUBY_EXTERN VALUE rb_cEncoding; #define ENC_UNINITIALIZED (&rb_cEncoding) Modified: MacRuby/trunk/include/ruby/ruby.h =================================================================== --- MacRuby/trunk/include/ruby/ruby.h 2010-02-11 02:16:52 UTC (rev 3486) +++ MacRuby/trunk/include/ruby/ruby.h 2010-02-11 02:36:16 UTC (rev 3487) @@ -939,6 +939,7 @@ ID rb_intern_str(VALUE str); ID rb_to_id(VALUE); VALUE rb_id2str(ID); +VALUE rb_name2sym(const char *); #if WITH_OBJC # define rb_sym2name(sym) (RSYMBOL(sym)->str) static inline Modified: MacRuby/trunk/numeric.c =================================================================== --- MacRuby/trunk/numeric.c 2010-02-11 02:16:52 UTC (rev 3486) +++ MacRuby/trunk/numeric.c 2010-02-11 02:36:16 UTC (rev 3487) @@ -1964,6 +1964,29 @@ return rb_vm_call(num, selMINUS, 1, &one, false); } +VALUE +rb_num_to_chr(VALUE num, rb_encoding *enc) +{ + // XXX completely broken + long i = NUM2LONG(num); + char c[2] = {i, '\0'}; + + if (enc) { + return rb_enc_str_new(c, 1, enc); + } + else { + if (i < 0 || 0xff < i) { + rb_raise(rb_eRangeError, "%"PRIdVALUE " out of char range", i); + } + if (i < 0x80) { + return rb_usascii_str_new(c, 1); + } + else { + return rb_str_new(c, 1); + } + } +} + /* * call-seq: * int.chr([encoding]) => string @@ -1979,31 +2002,10 @@ static VALUE int_chr(VALUE num, SEL sel, int argc, VALUE *argv) { - long i = NUM2LONG(num); - char c[2] = {i, '\0'}; - rb_encoding *enc; - VALUE str; - - switch (argc) { - case 0: - if (i < 0 || 0xff < i) { - rb_raise(rb_eRangeError, "%"PRIdVALUE " out of char range", i); - } - if (i < 0x80) { - return rb_usascii_str_new(c, 1); - } - else { - return rb_str_new(c, 1); - } - case 1: - break; - default: + if (argc > 1) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1)", argc); - break; } - enc = rb_to_encoding(argv[0]); - str = rb_enc_str_new(c, 1, enc); - return str; + return rb_num_to_chr(num, (argc ? rb_to_encoding(argv[0]) : NULL)); } static VALUE Modified: MacRuby/trunk/sprintf.cpp =================================================================== --- MacRuby/trunk/sprintf.cpp 2010-02-11 02:16:52 UTC (rev 3486) +++ MacRuby/trunk/sprintf.cpp 2010-02-11 02:36:16 UTC (rev 3487) @@ -266,7 +266,8 @@ */ #define GETNTHARG(nth) \ - ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth]) + ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : \ + argv[nth]) extern "C" { @@ -323,177 +324,474 @@ return result; } +#define IS_NEG(num) RBIGNUM_NEGATIVE_P(num) +#define REL_REF 1 +#define ABS_REF 2 +#define NAMED_REF 3 + +#define REF_NAME(type) \ + ((type) == REL_REF ? "relative" : (type) == ABS_REF ? "absolute" : "named") + +#define SET_REF_TYPE(type) \ + if (ref_type != 0 && (type) != ref_type) { \ + rb_raise(rb_eArgError, "can't mix %s references with %s references", \ + REF_NAME(type), REF_NAME(ref_type)); \ + } \ + ref_type = (type); + +#define GET_ARG() \ + if (arg == 0) { \ + SET_REF_TYPE(REL_REF); \ + arg = GETNTHARG(j); \ + j++; \ + } + +#define isprenum(ch) ((ch) == '-' || (ch) == ' ' || (ch) == '+') + +#define isnan(x) (x != x) +#define isinf(x) (__builtin_fabs(x) == __builtin_inf()) + static void -get_types_for_format_str(std::string &octypes, const unsigned int len, - VALUE *args, const char *format_str, char **new_fmt) +pad_format_value(VALUE arg, long start, long width, + CFStringRef pad) { - size_t format_str_len = strlen(format_str); - unsigned int i = 0, j = 0; + long slen = (long)CFStringGetLength((CFStringRef)arg); + if (width <= slen) { + return; + } + if (start < 0) { + start += slen + 1; + } + width -= slen; + do { + CFStringInsert((CFMutableStringRef)arg, start, pad); + } while (--width > 0); +} - while (i < format_str_len) { - bool sharp_modifier = false; - bool star_modifier = false; - if (format_str[i++] != '%') { +static long +cstr_update(char **str, unsigned long start, unsigned long num, char *replace) +{ + unsigned long len = strlen(*str) + 1; + unsigned long replace_len = strlen(replace); + if (start + num > len) { + num = len - start; + } + if (replace_len >= num) { + char *new_str = (char *)xmalloc(len + replace_len - num); + memcpy(new_str, *str, len); + *str = new_str; + } + if (replace_len != num) { + bcopy(*str + start + num, *str + start + replace_len, len - start - + num); + } + if (replace_len > 0) { + bcopy(replace, *str + start, replace_len); + } + return replace_len - num; +} + +VALUE +get_named_arg(char *format_str, unsigned long format_len, unsigned long *i, + VALUE hash) +{ + if (TYPE(hash) != T_HASH) { + rb_raise(rb_eArgError, + "hash required for named references"); + } + char closing = format_str[(*i)++] + 2; + char *str_ptr = format_str + *i; + while (*i < format_len && format_str[*i] != closing) { + (*i)++; + } + if (*i == format_len) { + rb_raise(rb_eArgError, + "malformed name - unmatched parenthesis"); + } + format_str[*i] = '\0'; + hash = rb_hash_aref(hash, rb_name2sym(str_ptr)); + format_str[*i] = closing; + return (hash); +} + +// XXX +// - this method uses strtol to read numbers from the format string, so +// extremely large numbers get silently truncated. this should be fixed +// - switch to a cfstring format string to allow for proper encoding support + +// XXX look for arguments that are altered but not duped +VALUE +rb_str_format(int argc, const VALUE *argv, VALUE fmt) +{ + bool tainted = OBJ_TAINTED(fmt); + fmt = rb_str_new3(fmt); + char *format_str = (char *)RSTRING_PTR(fmt); + unsigned long format_len = strlen(format_str); + long num; + int j = 0; + int ref_type = 0; + + for (unsigned long i = 0; i < format_len; i++) { + if (format_str[i] != '%') { continue; } - if (i < format_str_len && format_str[i] == '%') { - i++; + if (format_str[i + 1] == '%') { + cstr_update(&format_str, i, 1, (char *)""); continue; } - while (i < format_str_len) { - char type = 0; + + bool sharp_flag = false; + bool space_flag = false; + bool plus_flag = false; + bool minus_flag = false; + bool zero_flag = false; + bool precision_flag = false; + bool complete = false; + VALUE arg = 0; + long width = 0; + long precision = 0; + int base = 0; + CFStringRef negative_pad = NULL; + CFStringRef sharp_pad = CFSTR(""); + char *str_ptr; + + unsigned long start = i; + while (i++ < format_len) { switch (format_str[i]) { case '#': - sharp_modifier = true; + sharp_flag = true; break; case '*': - star_modifier = true; - type = _C_INT; + if (format_str[++i] == '<' || format_str[i] == '{') { + SET_REF_TYPE(NAMED_REF); + width = NUM2LONG(rb_Integer(get_named_arg(format_str, + format_len, &i, GETNTHARG(0)))); + } + else { + if (isprenum(format_str[i])) { + i--; + break; + } + num = strtol(format_str + i, &str_ptr, 10); + if (str_ptr == format_str + i--) { + SET_REF_TYPE(REL_REF); + width = NUM2LONG(rb_Integer(GETNTHARG(j))); + j++; + } + else if (*str_ptr == '$') { + SET_REF_TYPE(ABS_REF); + width = NUM2LONG(rb_Integer(GETNTHARG(num - 1))); + i = str_ptr - format_str; + } + } + if (width < 0) { + minus_flag = true; + width = -width; + } break; + case ' ': + if (!plus_flag) { + space_flag = true; + } + break; + + case '+': + plus_flag = true; + space_flag = false; + break; + + case '-': + zero_flag = false; + minus_flag = true; + break; + + case '0': + if (!precision_flag && !minus_flag) { + zero_flag = true; + } + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + num = strtol(format_str + i, &str_ptr, 10); + i = str_ptr - format_str; + if (*str_ptr == '$') { + if (num == 0) { + rb_raise(rb_eArgError, "invalid absolute argument"); + } + SET_REF_TYPE(ABS_REF); + arg = GETNTHARG(num - 1); + } + else { + SET_REF_TYPE(REL_REF); + width = num; + i--; + } + break; + + case '.': + zero_flag = false; + precision_flag = true; + if (format_str[++i] == '*') { + if (format_str[++i] == '<' || format_str[i] == '{') { + SET_REF_TYPE(NAMED_REF); + precision = NUM2LONG(rb_Integer(get_named_arg( + format_str, format_len, &i, GETNTHARG(0)))); + } + else { + if (isprenum(format_str[i])) { + i--; + break; + } + num = strtol(format_str + i, &str_ptr, 10); + if (str_ptr == format_str + i--) { + SET_REF_TYPE(REL_REF); + precision = NUM2LONG(rb_Integer(GETNTHARG(j))); + j++; + } + else if (*str_ptr == '$') { + SET_REF_TYPE(ABS_REF); + precision = NUM2LONG(rb_Integer(GETNTHARG( + num - 1))); + i = str_ptr - format_str; + } + } + } + else if (isdigit(format_str[i])) { + precision = strtol(format_str + i, &str_ptr, 10); + i = str_ptr - format_str - 1; + } + else { + rb_raise(rb_eArgError, "invalid precision"); + } + + if (precision < 0) { + precision = 0; + } + break; + + case '<': + case '{': + SET_REF_TYPE(NAMED_REF); + arg = get_named_arg(format_str, format_len, &i, + GETNTHARG(0)); + break; + case 'd': + case 'D': case 'i': - case 'o': case 'u': + case 'U': + base = 10; + complete = true; + break; + case 'x': case 'X': - type = _C_INT; + base = 16; + negative_pad = CFSTR("f"); + sharp_pad = CFSTR("0x"); + complete = true; break; + case 'o': + case 'O': + base = 8; + negative_pad = CFSTR("7"); + sharp_pad = CFSTR("0"); + complete = true; + break; + + case 'B': + case 'b': + base = 2; + negative_pad = CFSTR("1"); + sharp_pad = CFSTR("0b"); + complete = true; + break; + case 'c': case 'C': - type = _C_CHR; + GET_ARG(); + if (TYPE(arg) == T_STRING) { + arg = rb_str_substr(arg, 0, 1); + } + else { + // rb_num_to_chr is broken so leave out the + // enc or we don't get range checking + arg = rb_num_to_chr(arg, NULL /*rb_enc_get(fmt)*/); + } + complete = true; break; - case 'D': - case 'O': - case 'U': - type = _C_LNG; - break; - - case 'f': + case 'f': case 'F': - case 'e': + case 'e': case 'E': - case 'g': + case 'g': case 'G': case 'a': case 'A': - type = _C_DBL; - break; - - case 's': - case 'S': - { - if (i - 1 > 0) { - unsigned long k = i - 1; - while (k > 0 && format_str[k] == '0') { - k--; + { + // here we construct a new format str and then use + // c's sprintf. why? because floats are retarded + GET_ARG(); + double value = RFLOAT_VALUE(rb_Float(arg)); + complete = true; + + if (isnan(value) || isinf(value)) { + arg = rb_str_new2((char *)(isnan(value) ? "NaN" : + value < 0 ? "-Inf" : "Inf")); + if (isnan(value) || value > 0) { + if (plus_flag) { + rb_str_update(arg, 0, 0, (VALUE)CFSTR("+")); } - if (k < i && format_str[k] == '.') { - args[j] = (VALUE)CFSTR(""); + else if (space_flag) { + rb_str_update(arg, 0, 0, (VALUE)CFSTR(" ")); } } -#if 1 - // In Ruby, '%s' is supposed to convert the argument - // as a string, calling #to_s on it. In order to - // support this behavior we are changing the format - // to '@' which sends the -[NSObject description] - // message, exactly what we want. - if (*new_fmt == NULL) { - *new_fmt = strdup(format_str); - } - (*new_fmt)[i] = '@'; - type = _C_ID; -#else - type = _C_CHARPTR; -#endif + break; } + + arg = rb_str_new(format_str + i, 1); + if (precision_flag) { + rb_str_update(arg, 0, 0, rb_big2str(LONG2NUM(precision), + 10)); + rb_str_update(arg, 0, 0, (VALUE)CFSTR(".")); + } + rb_str_update(arg, 0, 0, rb_big2str(LONG2NUM(width), 10)); + if (minus_flag) { + rb_str_update(arg, 0, 0, (VALUE)CFSTR("-")); + } + else if (zero_flag) { + rb_str_update(arg, 0, 0, (VALUE)CFSTR("0")); + } + if (plus_flag) { + rb_str_update(arg, 0, 0, (VALUE)CFSTR("+")); + } + else if (space_flag) { + rb_str_update(arg, 0, 0, (VALUE)CFSTR(" ")); + } + if (sharp_flag) { + rb_str_update(arg, 0, 0, (VALUE)CFSTR("#")); + } + rb_str_update(arg, 0, 0, (VALUE)CFSTR("%")); + + asprintf(&str_ptr, RSTRING_PTR(arg), value); + arg = rb_str_new2(str_ptr); + free(str_ptr); break; + } + case 's': + case 'S': case 'p': case '@': - type = _C_ID; + GET_ARG(); + arg = (tolower(format_str[i]) != 's' ? rb_inspect(arg) + : TYPE(arg) == T_STRING ? rb_str_new3(arg) + : rb_obj_as_string(arg)); + if (precision_flag && precision + < CFStringGetLength((CFStringRef)arg)) { + CFStringPad((CFMutableStringRef)arg, NULL, precision, + 0); + } + complete = true; break; - case 'B': - case 'b': - { - VALUE arg = args[j]; - switch (TYPE(arg)) { - case T_STRING: - arg = rb_str_to_inum(arg, 0, Qtrue); - break; - } - arg = rb_big2str(arg, 2); - if (sharp_modifier) { - VALUE prefix = format_str[i] == 'B' - ? (VALUE)CFSTR("0B") : (VALUE)CFSTR("0b"); - rb_str_update(arg, 0, 0, prefix); - } - if (*new_fmt == NULL) { - *new_fmt = strdup(format_str); - } - (*new_fmt)[i] = '@'; - args[j] = arg; - type = _C_ID; - } - break; + default: + rb_raise(rb_eArgError, "malformed format string - %%%c", + format_str[i]); } + if (!complete) { + continue; + } - i++; + GET_ARG(); - if (type != 0) { - if (len == 0 || j >= len) { - rb_raise(rb_eArgError, - "Too much tokens in the format string `%s' "\ - "for the given %d argument(s)", format_str, len); + if (base != 0) { + bool sign_pad = false; + unsigned long num_index = 0; + CFStringRef zero_pad = CFSTR("0"); + + VALUE num = rb_Integer(arg); + if (TYPE(num) == T_FIXNUM) { + num = rb_int2big(FIX2LONG(num)); } - octypes.push_back(type); - j++; - if (!star_modifier) { - break; + if (plus_flag || space_flag) { + sign_pad = 1; } - } - } - } - for (; j < len; j++) { - octypes.push_back(_C_ID); - } -} + if (IS_NEG(num)) { + num_index = 1; + if (!sign_pad && negative_pad != NULL) { + zero_pad = negative_pad; + num = rb_big_clone(num); + rb_big_2comp(num); + } + } -VALUE -rb_str_format(int argc, const VALUE *argv, VALUE fmt) -{ - if (argc == 0) { - return fmt; - } + arg = rb_big2str(num, base); + if (!sign_pad && IS_NEG(num) && negative_pad != NULL) { + char neg = *RSTRING_PTR(negative_pad); + str_ptr = (char *)RSTRING_PTR(arg) + 1; + if (base == 8) { + *str_ptr |= ((~0 << 3) >> ((3 * strlen(str_ptr)) % + (sizeof(BDIGIT) * 8))) & ~(~0 << 3); + } + while (*str_ptr++ == neg) { + num_index++; + } + rb_str_update(arg, 0, num_index, (VALUE)negative_pad); + rb_str_update(arg, 0, 0, (VALUE)CFSTR("..")); + num_index = 2; + } - char *new_fmt = NULL; - std::string types("@@@@"); - get_types_for_format_str(types, (unsigned int)argc, (VALUE *)argv, - RSTRING_PTR(fmt), &new_fmt); + if (precision_flag) { + pad_format_value(arg, num_index, precision + (IS_NEG(num) && + (sign_pad || negative_pad == NULL) ? 1 : 0), + zero_pad); + } + if (sharp_flag && rb_cmpint(num, NULL, NULL) != 0) { + rb_str_update(arg, sign_pad, 0, (VALUE)sharp_pad); + num_index += 2; + } + if (sign_pad && RBIGNUM_POSITIVE_P(num)) { + rb_str_update(arg, 0, 0, (VALUE)(plus_flag ? + CFSTR("+") : CFSTR(" "))); + num_index++; + } + if (zero_flag) { + pad_format_value(arg, num_index, width, zero_pad); + } + if (ISUPPER(format_str[i])) { + CFStringUppercase((CFMutableStringRef)arg, NULL); + } + } + + if (OBJ_TAINTED(arg)) { + tainted = true; + } - if (new_fmt != NULL) { - fmt = rb_str_new2(new_fmt); - } - - VALUE *stub_args = (VALUE *)malloc(sizeof(VALUE) * (argc + 4)); - stub_args[0] = Qnil; // allocator - stub_args[1] = Qnil; // format options - stub_args[2] = fmt; // format string - for (int i = 0; i < argc; i++) { - stub_args[3 + i] = argv[i]; + pad_format_value(arg, minus_flag ? -1 : 0, width, CFSTR(" ")); + num = cstr_update(&format_str, start, i - start + 1, + (char *)RSTRING_PTR(arg)); + format_len += num; + i += num; + break; + } } - rb_vm_c_stub_t *stub = (rb_vm_c_stub_t *)GET_CORE()->gen_stub(types, - true, 3, false); - - VALUE str = (*stub)((IMP)&CFStringCreateWithFormat, argc + 3, stub_args); - CFMakeCollectable((void *)str); - free(stub_args); - return str; + fmt = rb_str_new2(format_str); + return tainted ? OBJ_TAINT(fmt) : fmt; } } // extern "C"
participants (1)
-
source_changes@macosforge.org