Revision: 3617 http://trac.macosforge.org/projects/ruby/changeset/3617 Author: lsansonetti@apple.com Date: 2010-02-25 00:17:43 -0800 (Thu, 25 Feb 2010) Log Message: ----------- a new Symbol class, unicode-aware + refactored/cleaned symbol generation Modified Paths: -------------- MacRuby/branches/icu/encoding.h MacRuby/branches/icu/id.c MacRuby/branches/icu/id.h MacRuby/branches/icu/include/ruby/intern.h MacRuby/branches/icu/include/ruby/ruby.h MacRuby/branches/icu/inits.c MacRuby/branches/icu/parse.y MacRuby/branches/icu/rakelib/builder/builder.rb MacRuby/branches/icu/string.c MacRuby/branches/icu/vm.cpp Added Paths: ----------- MacRuby/branches/icu/symbol.c MacRuby/branches/icu/symbol.h Modified: MacRuby/branches/icu/encoding.h =================================================================== --- MacRuby/branches/icu/encoding.h 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/encoding.h 2010-02-25 08:17:43 UTC (rev 3617) @@ -290,6 +290,8 @@ STRING_VALID_ENCODING); } +VALUE str_inspect(rb_str_t *str, bool dump); + // The following functions should always been prefered over anything else, // especially if this "else" is RSTRING_PTR and RSTRING_LEN. // They also work on CFStrings. @@ -299,6 +301,7 @@ long rb_str_chars_len(VALUE str); UChar rb_str_get_uchar(VALUE str, long pos); void rb_str_append_uchar(VALUE str, UChar c); +unsigned long rb_str_hash_uchars(UChar *chars, long chars_len); VALUE mr_enc_s_is_compatible(VALUE klass, SEL sel, VALUE str1, VALUE str2); Modified: MacRuby/branches/icu/id.c =================================================================== --- MacRuby/branches/icu/id.c 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/id.c 2010-02-25 08:17:43 UTC (rev 3617) @@ -61,6 +61,7 @@ selLength = sel_registerName("length"); selSucc = sel_registerName("succ"); selNot = sel_registerName("!"); + selNot2 = sel_registerName("!:"); selAlloc = sel_registerName("alloc"); selAllocWithZone = sel_registerName("allocWithZone:"); selCopyWithZone = sel_registerName("copyWithZone:"); Modified: MacRuby/branches/icu/id.h =================================================================== --- MacRuby/branches/icu/id.h 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/id.h 2010-02-25 08:17:43 UTC (rev 3617) @@ -71,6 +71,7 @@ extern SEL selLength; extern SEL selSucc; extern SEL selNot; +extern SEL selNot2; extern SEL selAlloc; extern SEL selAllocWithZone; extern SEL selCopyWithZone; Modified: MacRuby/branches/icu/include/ruby/intern.h =================================================================== --- MacRuby/branches/icu/include/ruby/intern.h 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/include/ruby/intern.h 2010-02-25 08:17:43 UTC (rev 3617) @@ -587,7 +587,7 @@ VALUE rb_str_append(VALUE, VALUE); VALUE rb_str_concat(VALUE, VALUE); VALUE rb_str_plus(VALUE str1, VALUE str2); -int rb_memhash(const void *ptr, long len); +long rb_memhash(const void *ptr, long len); int rb_str_hash(VALUE); int rb_str_hash_cmp(VALUE,VALUE); int rb_str_comparable(VALUE, VALUE); Modified: MacRuby/branches/icu/include/ruby/ruby.h =================================================================== --- MacRuby/branches/icu/include/ruby/ruby.h 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/include/ruby/ruby.h 2010-02-25 08:17:43 UTC (rev 3617) @@ -249,7 +249,8 @@ #else // voodoo_float must be a function // because the parameter must be converted to float -static inline VALUE voodoo_float(float f) +static inline VALUE +voodoo_float(float f) { return *(VALUE *)(&f); } @@ -258,15 +259,9 @@ #define FIXFLOAT_P(v) (((VALUE)v & IMMEDIATE_MASK) == FIXFLOAT_FLAG) #define FIXFLOAT2DBL(v) coerce_ptr_to_double((VALUE)v) -#if WITH_OBJC -# define SYMBOL_P(x) (TYPE(x) == T_SYMBOL) -# define ID2SYM(x) (rb_id2str((ID)x)) -# define SYM2ID(x) (RSYMBOL(x)->id) -#else -# define SYMBOL_P(x) (((VALUE)(x)&~(~(VALUE)0<<RUBY_SPECIAL_SHIFT))==SYMBOL_FLAG) -# define ID2SYM(x) (((VALUE)(x)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG) -# define SYM2ID(x) RSHIFT((unsigned long)x,RUBY_SPECIAL_SHIFT) -#endif +#define SYMBOL_P(x) (TYPE(x) == T_SYMBOL) +#define ID2SYM(x) (rb_id2str((ID)x)) +#define SYM2ID(x) (rb_sym2id((VALUE)x)) /* special contants - i.e. non-zero and non-fixnum constants */ enum ruby_special_consts { @@ -783,7 +778,6 @@ #define RFLOAT(obj) (R_CAST(RFloat)(obj)) #if WITH_OBJC # define RFIXNUM(obj) (R_CAST(RFixnum)(obj)) -# define RSYMBOL(obj) (R_CAST(RSymbol)(obj)) #endif #define RDATA(obj) (R_CAST(RData)(obj)) #define RSTRUCT(obj) (R_CAST(RStruct)(obj)) @@ -918,34 +912,19 @@ ID rb_intern2(const char*, long); ID rb_intern_str(VALUE str); ID rb_to_id(VALUE); +ID rb_sym2id(VALUE sym); VALUE rb_id2str(ID); VALUE rb_name2sym(const char *); -#if WITH_OBJC -# define rb_sym2name(sym) (RSYMBOL(sym)->str) +const char *rb_sym2name(VALUE sym); +VALUE rb_sym2str(VALUE sym); + static inline const char *rb_id2name(ID val) { VALUE s = rb_id2str(val); return s == 0 ? NULL : rb_sym2name(s); } -#else -const char *rb_id2name(ID); -#endif -#ifdef __GNUC__ -/* __builtin_constant_p and statement expression is available - * since gcc-2.7.2.3 at least. */ -#define rb_intern(str) \ - (__builtin_constant_p(str) ? \ - ({ \ - static ID rb_intern_id_cache; \ - if (!rb_intern_id_cache) \ - rb_intern_id_cache = rb_intern(str); \ - rb_intern_id_cache; \ - }) : \ - rb_intern(str)) -#endif - const char *rb_class2name(VALUE); const char *rb_obj_classname(VALUE); Modified: MacRuby/branches/icu/inits.c =================================================================== --- MacRuby/branches/icu/inits.c 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/inits.c 2010-02-25 08:17:43 UTC (rev 3617) @@ -36,7 +36,8 @@ void Init_Object(void); void Init_pack(void); void Init_Precision(void); -void Init_sym(void); +void Init_Symbol(void); +void Init_PreSymbol(void); void Init_id(void); void Init_process(void); void Init_Random(void); @@ -63,7 +64,7 @@ void rb_call_inits() { - Init_sym(); + Init_PreSymbol(); Init_id(); Init_var_tables(); Init_Object(); @@ -73,6 +74,7 @@ Init_Enumerable(); Init_Precision(); Init_String(); + Init_Symbol(); Init_Exception(); Init_eval(); Init_jump(); Modified: MacRuby/branches/icu/parse.y =================================================================== --- MacRuby/branches/icu/parse.y 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/parse.y 2010-02-25 08:17:43 UTC (rev 3617) @@ -22,6 +22,7 @@ #include "ruby/st.h" #include "ruby/encoding.h" #include "encoding.h" +#include "symbol.h" #include "id.h" #include "re.h" #include <stdio.h> @@ -38,31 +39,6 @@ #define calloc YYCALLOC #define free YYFREE -#define ID_SCOPE_SHIFT 3 -#define ID_SCOPE_MASK 0x07 -#define ID_LOCAL 0x00 -#define ID_INSTANCE 0x01 -#define ID_GLOBAL 0x03 -#define ID_ATTRSET 0x04 -#define ID_CONST 0x05 -#define ID_CLASS 0x06 -#define ID_JUNK 0x07 -#define ID_INTERNAL ID_JUNK - -#define is_notop_id(id) ((id)>tLAST_TOKEN) -#define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL) -#define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL) -#define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE) -#define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET) -#define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST) -#define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS) -#define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK) - -#define is_asgn_or_id(id) ((is_notop_id(id)) && \ - (((id)&ID_SCOPE_MASK) == ID_GLOBAL || \ - ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \ - ((id)&ID_SCOPE_MASK) == ID_CLASS)) - enum lex_state_e { EXPR_BEG, /* ignore newline, +/- is a sign. */ EXPR_END, /* newline significant, +/- is a operator. */ @@ -511,7 +487,6 @@ #define reg_fragment_check(str,options) reg_fragment_check_gen(parser, str, options) static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,regexp,match) -int rb_enc_symname2_p(const char *, int, rb_encoding *); #else #define remove_begin(node) (node) #endif /* !RIPPER */ @@ -4912,10 +4887,6 @@ # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) #endif -#if !WITH_OBJC -#define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc) -#define parser_precise_mbclen() rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc) -#endif #define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p)) #define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc)) @@ -8165,14 +8136,6 @@ } } -ID -rb_id_attrset(ID id) -{ - id &= ~ID_SCOPE_MASK; - id |= ID_ATTRSET; - return id; -} - static NODE * attrset_gen(struct parser_params *parser, NODE *recv, ID id) { @@ -9260,533 +9223,53 @@ return scope; } -static const struct { - ID token; - const char *name; -} op_tbl[] = { - {tDOT2, ".."}, - {tDOT3, "..."}, - {'+', "+"}, - {'-', "-"}, - {'+', "+(binary)"}, - {'-', "-(binary)"}, - {'*', "*"}, - {'/', "/"}, - {'%', "%"}, - {tPOW, "**"}, - {tUPLUS, "+@"}, - {tUMINUS, "-@"}, - {'|', "|"}, - {'^', "^"}, - {'&', "&"}, - {'!', "!"}, - {tCMP, "<=>"}, - {'>', ">"}, - {tGEQ, ">="}, - {'<', "<"}, - {tLEQ, "<="}, - {tEQ, "=="}, - {tEQQ, "==="}, - {tNEQ, "!="}, - {tMATCH, "=~"}, - {tNMATCH, "!~"}, - {'~', "~"}, - {'!', "!"}, - {tAREF, "[]"}, - {tASET, "[]="}, - {tLSHFT, "<<"}, - {tRSHFT, ">>"}, - {tCOLON2, "::"}, - {'`', "`"}, - {0, 0} -}; - -static struct symbols { - ID last_id; -#if WITH_OBJC - CFMutableDictionaryRef sym_id; - CFMutableDictionaryRef id_str; -#else - st_table *sym_id; - st_table *id_str; -#endif - VALUE op_sym[tLAST_TOKEN]; -} global_symbols = {tLAST_TOKEN >> ID_SCOPE_SHIFT}; - -static const struct st_hash_type symhash = { - rb_str_hash_cmp, - rb_str_hash, -}; - -struct ivar2_key { - ID id; - VALUE klass; -}; - -static int -ivar2_cmp(struct ivar2_key *key1, struct ivar2_key *key2) -{ - if (key1->id == key2->id && key1->klass == key2->klass) { - return 0; - } - return 1; -} - -static int -ivar2_hash(struct ivar2_key *key) -{ - return (key->id << 8) ^ (key->klass >> 2); -} - -static const struct st_hash_type ivar2_hash_type = { - ivar2_cmp, - ivar2_hash, -}; - -void -Init_sym(void) -{ -#if WITH_OBJC - global_symbols.sym_id = CFDictionaryCreateMutable(NULL, - 0, NULL, NULL); - GC_ROOT(&global_symbols.sym_id); - global_symbols.id_str = CFDictionaryCreateMutable(NULL, - 0, NULL, NULL); - GC_ROOT(&global_symbols.id_str); -#else - global_symbols.sym_id = st_init_table_with_size(&symhash, 1000); - global_symbols.id_str = st_init_numtable_with_size(1000); -#endif - rb_intern2("", 0); -} - -#if !WITH_OBJC -void -rb_gc_mark_symbols(void) -{ - rb_mark_tbl(global_symbols.id_str); - rb_gc_mark_locations(global_symbols.op_sym, - global_symbols.op_sym + tLAST_TOKEN); -} -#endif - // XXX not thread-safe static long internal_count = 0; static ID internal_id_gen(struct parser_params *parser) { -#if 1 char buf[100]; snprintf(buf, sizeof buf, "__internal_id_tmp_%ld__", internal_count++); return rb_intern(buf); -#else - ID id = (ID)vtable_size(lvtbl->args) + (ID)vtable_size(lvtbl->vars); - id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1; - return ID_INTERNAL | (id << ID_SCOPE_SHIFT); -#endif } -static int -is_special_global_name(const char *m, const char *e, rb_encoding *enc) -{ - int mb = 0; +struct rb_op_tbl_entry rb_op_tbl[] = { + {'+', "+"}, + {'-', "-"}, + {'*', "*"}, + {'/', "/"}, + {'%', "%"}, + {'|', "|"}, + {'^', "^"}, + {'&', "&"}, + {'!', "!"}, + {'>', ">"}, + {'<', "<"}, + {'~', "~"}, + {'!', "!"}, + {'`', "`"}, + {tDOT2, ".."}, + {tDOT3, "..."}, + {tPOW, "**"}, + {tUPLUS, "+@"}, + {tUMINUS, "-@"}, + {tCMP, "<=>"}, + {tGEQ, ">="}, + {tLEQ, "<="}, + {tEQ, "=="}, + {tEQQ, "==="}, + {tNEQ, "!="}, + {tMATCH, "=~"}, + {tNMATCH, "!~"}, + {tAREF, "[]"}, + {tASET, "[]="}, + {tLSHFT, "<<"}, + {tRSHFT, ">>"}, + {tCOLON2, "::"}, + {0, NULL} +}; - if (m >= e) return 0; - switch (*m) { - case '~': case '*': case '$': case '?': case '!': case '@': - case '/': case '\\': case ';': case ',': case '.': case '=': - case ':': case '<': case '>': case '\"': - case '&': case '`': case '\'': case '+': - case '0': - ++m; - break; - case '-': - ++m; - if (m < e && is_identchar(m, e, enc)) { - if (!ISASCII(*m)) mb = 1; -#if WITH_OBJC - m += 1; -#else - m += rb_enc_mbclen(m, e, enc); -#endif - } - break; - default: - if (!rb_enc_isdigit(*m, enc)) return 0; - do { - if (!ISASCII(*m)) mb = 1; - ++m; - } while (rb_enc_isdigit(*m, enc)); - } - return m == e ? mb + 1 : 0; -} - -int -rb_symname_p(const char *name) -{ -#if WITH_OBJC - return rb_enc_symname_p(name, NULL); -#else - return rb_enc_symname_p(name, rb_ascii8bit_encoding()); -#endif -} - -int -rb_enc_symname_p(const char *name, rb_encoding *enc) -{ - return rb_enc_symname2_p(name, strlen(name), enc); -} - -int -rb_enc_symname2_p(const char *name, int len, rb_encoding *enc) -{ - const char *m = name; - const char *e = m + len; - int localid = Qfalse; - - if (!m) return Qfalse; - switch (*m) { - case '\0': - return Qfalse; - - case '$': - if (is_special_global_name(++m, e, enc)) return Qtrue; - goto id; - - case '@': - if (*++m == '@') ++m; - goto id; - - case '<': - switch (*++m) { - case '<': ++m; break; - case '=': if (*++m == '>') ++m; break; - default: break; - } - break; - - case '>': - switch (*++m) { - case '>': case '=': ++m; break; - } - break; - - case '=': - switch (*++m) { - case '~': ++m; break; - case '=': if (*++m == '=') ++m; break; - default: return Qfalse; - } - break; - - case '*': - if (*++m == '*') ++m; - break; - - case '+': case '-': - if (*++m == '@') ++m; - break; - - case '|': case '^': case '&': case '/': case '%': case '~': case '`': - ++m; - break; - - case '[': - if (*++m != ']') return Qfalse; - if (*++m == '=') ++m; - break; - - case '!': - switch (*++m) { - case '\0': return Qtrue; - case '=': case '~': ++m; break; - default: return Qfalse; - } - break; - - default: - localid = !rb_enc_isupper(*m, enc); - id: - if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m))) - return Qfalse; -#if WITH_OBJC - while (m < e && is_identchar(m, e, enc)) m += 1; -#else - while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); -#endif - if (localid) { - switch (*m) { - case '!': case '?': case '=': ++m; - } - } - break; - } - return *m ? Qfalse : Qtrue; -} - -#if WITH_OBJC -static inline VALUE -rsymbol_new(const char *name, const int len, ID id) -{ - assert(rb_cSymbol != 0); - - struct RSymbol *sym = (struct RSymbol *)orig_malloc(sizeof(struct RSymbol)); - sym->klass = rb_cSymbol; - sym->str = orig_malloc(len + 1); - strcpy(sym->str, name); - sym->len = len; - sym->id = id; - - return (VALUE)sym; -} -#endif - -ID -rb_intern3(const char *name, long len, rb_encoding *enc) -{ - const char *m = name; - const char *e = m + len; - VALUE str; - ID id; - int last; - int mb; -#if !WITH_OBJC - struct RString fake_str; - fake_str.basic.isa = NULL; - fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE; - fake_str.basic.klass = rb_cString; - fake_str.as.heap.len = len; - fake_str.as.heap.ptr = (char *)name; - fake_str.as.heap.aux.capa = len; - str = (VALUE)&fake_str; - rb_enc_associate(str, enc); - - if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id)) - return id; -#else - long sname = strlen(name); - assert(len <= sname); - if (sname != len) { - char *tmp = (char *)alloca(len + 1); - memcpy(tmp, name, len); - tmp[len] = '\0'; - m = name = tmp; - e = m + len; - } - SEL name_hash = sel_registerName(name); - if (name_hash == sel_ignored) { - if (strcmp(name, "retain") == 0) { - name_hash = (SEL)0x1000; - } - else if (strcmp(name, "release") == 0) { - name_hash = (SEL)0x2000; - } - else if (strcmp(name, "retainCount") == 0) { - name_hash = (SEL)0x3000; - } - else if (strcmp(name, "autorelease") == 0) { - name_hash = (SEL)0x4000; - } - else if (strcmp(name, "dealloc") == 0) { - name_hash = (SEL)0x5000; - } - else { - printf("unrecognized ignored sel %s\n", name); - abort(); - } - } - id = (ID)CFDictionaryGetValue((CFDictionaryRef)global_symbols.sym_id, - (const void *)name_hash); - if (id != 0) - return id; -#endif - - last = len-1; - id = 0; - switch (*m) { - case '$': - id |= ID_GLOBAL; - if ((mb = is_special_global_name(++m, e, enc)) != 0) { - if (!--mb) { -#if WITH_OBJC - enc = NULL; -#else - enc = rb_ascii8bit_encoding(); -#endif - } - goto new_id; - } - break; - case '@': - if (m[1] == '@') { - m++; - id |= ID_CLASS; - } - else { - id |= ID_INSTANCE; - } - m++; - break; - default: - if (m[0] != '_' && rb_enc_isascii((unsigned char)m[0], enc) - && !rb_enc_isalnum(m[0], enc)) { - /* operators */ - int i; - - for (i=0; op_tbl[i].token; i++) { - if (*op_tbl[i].name == *m && - strcmp(op_tbl[i].name, m) == 0) { - id = op_tbl[i].token; - goto id_register; - } - } - } - - if (m[last] == '=') { - /* attribute assignment */ - id = rb_intern3(name, last, enc); - if (id > tLAST_TOKEN && !is_attrset_id(id)) { - enc = rb_enc_get(rb_id2str(id)); - id = rb_id_attrset(id); - goto id_register; - } - id = ID_ATTRSET; - } - else if (rb_enc_isupper(m[0], enc)) { - id = ID_CONST; - } - else { - id = ID_LOCAL; - } - break; - } - mb = 0; - if (!rb_enc_isdigit(*m, enc)) { - while (m <= name + last && is_identchar(m, e, enc)) { - if (ISASCII(*m)) { - m++; - } - else { - mb = 1; -#if WITH_OBJC - m += 1; -#else - m += rb_enc_mbclen(m, e, enc); -#endif - } - } - } - if (m - name < len) id = ID_JUNK; -#if !WITH_OBJC - if (enc != rb_usascii_encoding()) { - /* - * this clause makes sense only when called from other than - * rb_intern_str() taking care of code-range. - */ - if (!mb) { - for (; m <= name + len; ++m) { - if (!ISASCII(*m)) goto mbstr; - } - enc = rb_usascii_encoding(); - } - mbstr:; - } -#endif - new_id: - id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; - id_register: - str = rsymbol_new(name, len, id); -#if WITH_OBJC - CFDictionarySetValue(global_symbols.sym_id, (const void *)name_hash, - (const void *)id); - CFDictionarySetValue(global_symbols.id_str, (const void *)id, - (const void *)str); -#else - st_add_direct(global_symbols.sym_id, (st_data_t)str, id); - st_add_direct(global_symbols.id_str, id, (st_data_t)str); -#endif - return id; -} - -ID -rb_intern2(const char *name, long len) -{ -#if WITH_OBJC - return rb_intern3(name, len, NULL); -#else - return rb_intern3(name, len, rb_usascii_encoding()); -#endif -} - -#undef rb_intern -ID -rb_intern(const char *name) -{ - return rb_intern2(name, strlen(name)); -} - -ID -rb_intern_str(VALUE str) -{ - const char *s = RSTRING_PTR(str); - return rb_intern3(s, strlen(s), NULL); -} - -VALUE -rb_id2str(ID id) -{ - st_data_t data; - - if (id < tLAST_TOKEN) { - int i = 0; - - for (i=0; op_tbl[i].token; i++) { - if (op_tbl[i].token == id) { - VALUE str = global_symbols.op_sym[i]; - if (!str) { - str = rsymbol_new(op_tbl[i].name, strlen(op_tbl[i].name), op_tbl[i].token); - global_symbols.op_sym[i] = str; - } - return str; - } - } - } - - data = (VALUE)CFDictionaryGetValue( - (CFDictionaryRef)global_symbols.id_str, - (const void *)id); - if (data != 0) { - return data; - } - - if (is_attrset_id(id)) { - ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL; - VALUE str; - - while (!(str = rb_id2str(id2))) { - if (!is_local_id(id2)) return 0; - id2 = (id & ~ID_SCOPE_MASK) | ID_CONST; - } - str = rb_str_dup(str); - rb_str_cat(str, "=", 1); - rb_intern_str(str); - data = (VALUE)CFDictionaryGetValue( - (CFDictionaryRef)global_symbols.id_str, - (const void *)id); - if (data != 0) { - return data; - } - } - return 0; -} - -VALUE -rb_name2sym(const char *name) -{ - return rb_id2str(rb_intern(name)); -} - const char * ruby_node_name(int node) { @@ -9797,41 +9280,7 @@ return 0; } } - -/* - * call-seq: - * Symbol.all_symbols => array - * - * Returns an array of all the symbols currently in Ruby's symbol - * table. - * - * Symbol.all_symbols.size #=> 903 - * Symbol.all_symbols[1,20] #=> [:floor, :ARGV, :Binding, :symlink, - * :chown, :EOFError, :$;, :String, - * :LOCK_SH, :"setuid?", :$<, - * :default_proc, :compact, :extend, - * :Tms, :getwd, :$=, :ThreadGroup, - * :wait2, :$>] - */ -VALUE -rb_sym_all_symbols(void) -{ - const void **values; - long count; - VALUE ary; - - ary = rb_ary_new(); - count = CFDictionaryGetCount(global_symbols.id_str); - if (count == 0) { - return ary; - } - values = alloca(sizeof(void *) * count); - CFDictionaryGetKeysAndValues(global_symbols.id_str, NULL, values); - CFArrayReplaceValues((CFMutableArrayRef)ary, CFRangeMake(0, 0), values, count); - return ary; -} - int rb_is_const_id(ID id) { @@ -9867,6 +9316,14 @@ return Qfalse; } +ID +rb_id_attrset(ID id) +{ + id &= ~ID_SCOPE_MASK; + id |= ID_ATTRSET; + return id; +} + #endif /* !RIPPER */ static void Modified: MacRuby/branches/icu/rakelib/builder/builder.rb =================================================================== --- MacRuby/branches/icu/rakelib/builder/builder.rb 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/rakelib/builder/builder.rb 2010-02-25 08:17:43 UTC (rev 3617) @@ -6,7 +6,7 @@ random range rational re ruby signal sprintf st string struct time transcode util variable version thread id objc bs ucnv encoding main dln dmyext marshal gcd vm_eval prelude miniprelude gc-stub bridgesupport compiler dispatcher vm - debugger MacRuby MacRubyDebuggerConnector NSArray NSDictionary + symbol debugger MacRuby MacRubyDebuggerConnector NSArray NSDictionary } EXTENSIONS = %w{ Modified: MacRuby/branches/icu/string.c =================================================================== --- MacRuby/branches/icu/string.c 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/string.c 2010-02-25 08:17:43 UTC (rev 3617) @@ -22,9 +22,6 @@ #include "ruby/node.h" #include "vm.h" -VALUE rb_cSymbol; // XXX move me outside -VALUE rb_cByteString; // XXX remove all references about me, i'm dead - VALUE rb_cString; VALUE rb_cNSString; VALUE rb_cNSMutableString; @@ -706,8 +703,12 @@ // self[pos..pos+len] = str assert(pos >= 0 && len >= 0); + if (str != NULL) { + str_must_have_compatible_encoding(self, str); + str_make_same_format(self, str); + } + character_boundaries_t beg, end; - if (pos + len == 0) { // Positioning before the string. const long offset = 0; @@ -740,8 +741,6 @@ long bytes_to_add = 0; if (str != NULL) { - str_must_have_compatible_encoding(self, str); - str_make_same_format(self, str); if (str->length_in_bytes > bytes_to_splice) { str_resize_bytes(self, self->length_in_bytes + (str->length_in_bytes - bytes_to_splice)); @@ -1826,12 +1825,12 @@ str_append_uchar(RSTR(result), c); } -static VALUE -str_inspect(VALUE str, bool dump) +VALUE +str_inspect(rb_str_t *str, bool dump) { - const bool uchars = str_is_stored_in_uchars(RSTR(str)); + const bool uchars = str_is_stored_in_uchars(str); const long len = uchars - ? str_length(RSTR(str), true) : RSTR(str)->length_in_bytes; + ? str_length(str, true) : str->length_in_bytes; if (len == 0) { return rb_str_new2("\"\""); @@ -1840,12 +1839,12 @@ // Allocate an UTF-8 string with a good initial capacity. // Binary strings will likely have most bytes escaped. const long result_init_len = - BINARY_ENC(RSTR(str)->encoding) ? (len * 5) + 2 : len + 2; + BINARY_ENC(str->encoding) ? (len * 5) + 2 : len + 2; VALUE result = rb_unicode_str_new(NULL, result_init_len); #define GET_UCHAR(pos) \ ((uchars \ - ? RSTR(str)->data.uchars[pos] : (UChar)RSTR(str)->data.bytes[pos])) + ? str->data.uchars[pos] : (UChar)str->data.bytes[pos])) inspect_append(result, '"', false); for (long i = 0; i < len; i++) { @@ -1908,7 +1907,7 @@ static VALUE rstr_inspect(VALUE self, SEL sel) { - return str_inspect(self, false); + return str_inspect(RSTR(self), false); } /* @@ -1922,7 +1921,7 @@ static VALUE rstr_dump(VALUE self, SEL sel) { - return str_inspect(self, true); + return str_inspect(RSTR(self), true); } /* @@ -3005,10 +3004,6 @@ rb_fs = Qnil; rb_define_variable("$;", &rb_fs); rb_define_variable("$-F", &rb_fs); - - // rb_cSymbol is defined earlier in Init_PreVM(). - rb_set_class_path(rb_cSymbol, rb_cObject, "Symbol"); - rb_const_set(rb_cObject, rb_intern("Symbol"), rb_cSymbol); } bool @@ -3487,16 +3482,41 @@ return (VALUE)str_dup(RSTR(str)); } if (TYPE(str) == T_SYMBOL) { - return rb_str_new2(RSYMBOL(str)->str); + return rb_sym_to_s(str); } abort(); // TODO } -int +// Unicode characters hashing function, copied from CoreFoundation. +// This function might have some performance issues on large strings. +unsigned long +rb_str_hash_uchars(UChar *chars, long len) +{ +#define HashNextFourUniChars(accessStart, accessEnd, pointer) \ + {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;} + +#define HashNextUniChar(accessStart, accessEnd, pointer) \ + {result = result * 257 + (accessStart 0 accessEnd); pointer++;} + + assert(len > 0); + unsigned long result = len; + const UChar *end4 = chars + (len & ~3); + const UChar *end = chars + len; + // First count in fours + while (chars < end4) HashNextFourUniChars(chars[, ], chars); + // Then for the last <4 chars, count in ones... + while (chars < end) HashNextUniChar(chars[, ], chars); + return result + (result << (len & 31)); + +#undef HashNextFourUniChars +#undef HashNextUniChar +} + +long rb_memhash(const void *ptr, long len) { CFDataRef data = CFDataCreate(NULL, (const UInt8 *)ptr, len); - int code = CFHash(data); + const long code = CFHash(data); CFRelease((CFTypeRef)data); return code; } @@ -3531,11 +3551,3 @@ { abort(); // TODO } - -// Symbols (TODO: move me outside) - -VALUE -rb_sym_to_s(VALUE sym) -{ - return rb_str_new2(RSYMBOL(sym)->str); -} Added: MacRuby/branches/icu/symbol.c =================================================================== --- MacRuby/branches/icu/symbol.c (rev 0) +++ MacRuby/branches/icu/symbol.c 2010-02-25 08:17:43 UTC (rev 3617) @@ -0,0 +1,362 @@ +/* + * MacRuby Symbols. + * + * This file is covered by the Ruby license. See COPYING for more details. + * + * Copyright (C) 2010, Apple Inc. All rights reserved. + */ + +#include <wctype.h> + +#include "ruby.h" +#include "ruby/encoding.h" +#include "encoding.h" +#include "symbol.h" +#include "ruby/node.h" +#include "vm.h" + +VALUE rb_cSymbol; + +static CFMutableDictionaryRef sym_id = NULL, id_str = NULL; +static long last_id = 0; + +typedef struct { + VALUE klass; + rb_str_t *str; + ID id; + SEL sel; +} rb_sym_t; + +#define RSYM(obj) ((rb_sym_t *)(obj)) + +static rb_sym_t * +sym_alloc(rb_str_t *str, ID id) +{ + rb_sym_t *sym = (rb_sym_t *)malloc(sizeof(rb_sym_t)); + assert(rb_cSymbol != 0); + sym->klass = rb_cSymbol; + GC_RETAIN(str); // never released + sym->str = str; + sym->id = id; + sym->sel = NULL; // lazy + return sym; +} + +ID +rb_intern_str(VALUE str) +{ + UChar *chars = NULL; + long chars_len = 0; + bool need_free = false; + rb_str_get_uchars(str, &chars, &chars_len, &need_free); + assert(chars_len > 0); + + const unsigned long name_hash = rb_str_hash_uchars(chars, chars_len); + ID id = (ID)CFDictionaryGetValue(sym_id, (const void *)name_hash); + if (id != 0) { + goto return_id; + } + + rb_sym_t *sym = NULL; + + switch (chars[0]) { + case '$': + id = ID_GLOBAL; + break; + + case '@': + if (chars_len > 1 && chars[1] == '@') { + id = ID_CLASS; + } + else { + id = ID_INSTANCE; + } + break; + + default: + if (chars_len > 1 && chars[chars_len - 1] == '=') { + // Attribute assignment. + id = rb_intern_str(rb_str_substr(str, 0, chars_len - 1)); + if (!is_attrset_id(id)) { + id = rb_id_attrset(id); + goto id_register; + } + id = ID_ATTRSET; + } + else if (iswupper(chars[0])) { + id = ID_CONST; + } + else { + id = ID_LOCAL; + } + break; + } + + id |= ++last_id << ID_SCOPE_SHIFT; + +id_register: +//printf("register %s hash %ld id %ld\n", RSTRING_PTR(str), name_hash, id); + sym = sym_alloc(RSTR(str), id); + CFDictionarySetValue(sym_id, (const void *)name_hash, (const void *)id); + CFDictionarySetValue(id_str, (const void *)id, (const void *)sym); + +return_id: + if (need_free && chars != NULL) { + free(chars); + } + return id; +} + +VALUE +rb_id2str(ID id) +{ + VALUE sym = (VALUE)CFDictionaryGetValue(id_str, (const void *)id); + if (sym != 0) { +//printf("lookup %ld -> %s\n", id, rb_sym2name(sym)); + return sym; + } + + if (is_attrset_id(id)) { + // Attribute assignment. + ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL; + + while ((sym = rb_id2str(id2)) == 0) { + if (!is_local_id(id2)) { +//printf("lookup %ld -> FAIL\n", id); + return 0; + } + id2 = (id & ~ID_SCOPE_MASK) | ID_CONST; + } + + VALUE str = rb_str_dup((VALUE)RSYM(sym)->str); + rb_str_cat(str, "=", 1); + rb_intern_str(str); + + // Retry one more time. + sym = (VALUE)CFDictionaryGetValue(id_str, (const void *)id); + if (sym != 0) { +//printf("lookup %ld -> %s\n", id, rb_sym2name(sym)); + return sym; + } + } +//printf("lookup %ld -> FAIL\n", id); + return 0; +} + +ID +rb_intern3(const char *name, long len, rb_encoding *enc) +{ + VALUE str = rb_enc_str_new(name, len, enc); + return rb_intern_str(str); +} + +ID +rb_intern2(const char *name, long len) +{ + return rb_intern_str(rb_str_new(name, len)); +} + +ID +rb_intern(const char *name) +{ + return rb_intern_str(rb_str_new2(name)); +} + +ID +rb_sym2id(VALUE sym) +{ + return RSYM(sym)->id; +} + +VALUE +rb_name2sym(const char *name) +{ + return rb_id2str(rb_intern(name)); +} + +VALUE +rb_sym2str(VALUE sym) +{ + return (VALUE)RSYM(sym)->str; +} + +VALUE +rb_sym_to_s(VALUE sym) +{ + return rb_str_dup(rb_sym2str(sym)); +} + +const char * +rb_sym2name(VALUE sym) +{ + return RSTRING_PTR(RSYM(sym)->str); +} + +/* + * call-seq: + * Symbol.all_symbols => array + * + * Returns an array of all the symbols currently in Ruby's symbol + * table. + * + * Symbol.all_symbols.size #=> 903 + * Symbol.all_symbols[1,20] #=> [:floor, :ARGV, :Binding, :symlink, + * :chown, :EOFError, :$;, :String, + * :LOCK_SH, :"setuid?", :$<, + * :default_proc, :compact, :extend, + * :Tms, :getwd, :$=, :ThreadGroup, + * :wait2, :$>] + */ + +static VALUE +rsym_all_symbols(VALUE klass, SEL sel) +{ + VALUE ary = rb_ary_new(); + const long count = CFDictionaryGetCount(id_str); + if (count >= 0) { + const void **values = (const void **)malloc(sizeof(void *) * count); + CFDictionaryGetKeysAndValues(id_str, NULL, values); + for (long i = 0; i < count; i++) { + rb_ary_push(ary, (VALUE)values[i]); + } + free(values); + } + return ary; +} + +void +Init_PreSymbol(void) +{ + sym_id = CFDictionaryCreateMutable(NULL, 0, NULL, NULL); + id_str = CFDictionaryCreateMutable(NULL, 0, NULL, NULL); + last_id = 1000; + + // Pre-register parser symbols. + for (int i = 0; rb_op_tbl[i].token != 0; i++) { + VALUE str = rb_str_new2(rb_op_tbl[i].name); + + UChar *chars = NULL; + long chars_len = 0; + bool need_free = false; + rb_str_get_uchars(str, &chars, &chars_len, &need_free); + assert(chars_len > 0); + + ID id = rb_op_tbl[i].token; + rb_sym_t *sym = sym_alloc(RSTR(str), id); + unsigned long name_hash = rb_str_hash_uchars(chars, chars_len); + +//printf("pre-register %s hash %ld id %ld\n", RSTRING_PTR(str), name_hash, id); + + CFDictionarySetValue(sym_id, (const void *)name_hash, (const void *)id); + CFDictionarySetValue(id_str, (const void *)id, (const void *)sym); + + if (need_free && chars != NULL) { + free(chars); + } + } +} + +/* + * call-seq: + * sym == obj => true or false + * + * Equality---If <i>sym</i> and <i>obj</i> are exactly the same + * symbol, returns <code>true</code>. Otherwise, compares them + * as strings. + */ + +static VALUE +rsym_equal(VALUE sym, SEL sel, VALUE other) +{ + return sym == other ? Qtrue : Qfalse; +} + +/* + * call-seq: + * sym.inspect => string + * + * Returns the representation of <i>sym</i> as a symbol literal. + * + * :fred.inspect #=> ":fred" + */ + +static VALUE +rsym_inspect(VALUE sym, SEL sel) +{ + VALUE str = rb_str_new2(":"); + rb_str_concat(str, str_inspect(RSYM(sym)->str, true)); + return str; +} + +/* + * call-seq: + * sym.to_proc + * + * Returns a _Proc_ object which respond to the given method by _sym_. + * + * (1..3).collect(&:to_s) #=> ["1", "2", "3"] + */ + +static VALUE +rsym_to_proc(VALUE sym, SEL sel) +{ + SEL msel = sel_registerName(rb_id2name(SYM2ID(sym))); + rb_vm_block_t *b = rb_vm_create_block_calling_sel(msel); + return rb_proc_alloc_with_block(rb_cProc, b); +} + +/* + * call-seq: + * sym.id2name => string + * sym.to_s => string + * + * Returns the name or string corresponding to <i>sym</i>. + * + * :fred.id2name #=> "fred" + */ + +static VALUE +rsym_to_s(VALUE sym, SEL sel) +{ + return rb_sym_to_s(sym); +} + +/* + * call-seq: + * sym.to_sym => sym + * sym.intern => sym + * + * In general, <code>to_sym</code> returns the <code>Symbol</code> + * corresponding to an object. As <i>sym</i> is already a symbol, + * <code>self</code> is returned in this case. + */ + +static VALUE +rsym_to_sym(VALUE sym, SEL sel) +{ + return sym; +} + +void +Init_Symbol(void) +{ + // rb_cSymbol is defined earlier in Init_PreVM(). + rb_set_class_path(rb_cSymbol, rb_cObject, "Symbol"); + rb_const_set(rb_cObject, rb_intern("Symbol"), rb_cSymbol); + + rb_undef_alloc_func(rb_cSymbol); + rb_undef_method(*(VALUE *)rb_cSymbol, "new"); + rb_objc_define_method(*(VALUE *)rb_cSymbol, "all_symbols", + rsym_all_symbols, 0); + + rb_objc_define_method(rb_cSymbol, "==", rsym_equal, 1); + rb_objc_define_method(rb_cSymbol, "eql?", rsym_equal, 1); + //rb_objc_define_method(rb_cSymbol, "<=>", rsym_cmp, 1); + rb_objc_define_method(rb_cSymbol, "inspect", rsym_inspect, 0); + rb_objc_define_method(rb_cSymbol, "to_proc", rsym_to_proc, 0); + rb_objc_define_method(rb_cSymbol, "to_s", rsym_to_s, 0); + rb_objc_define_method(rb_cSymbol, "id2name", rsym_to_s, 0); + rb_objc_define_method(rb_cSymbol, "description", rsym_to_s, 0); + rb_objc_define_method(rb_cSymbol, "intern", rsym_to_sym, 0); + rb_objc_define_method(rb_cSymbol, "to_sym", rsym_to_sym, 0); +} Added: MacRuby/branches/icu/symbol.h =================================================================== --- MacRuby/branches/icu/symbol.h (rev 0) +++ MacRuby/branches/icu/symbol.h 2010-02-25 08:17:43 UTC (rev 3617) @@ -0,0 +1,54 @@ +/* + * MacRuby Symbols. + * + * This file is covered by the Ruby license. See COPYING for more details. + * + * Copyright (C) 2010, Apple Inc. All rights reserved. + */ + +#ifndef __SYMBOL_H_ +#define __SYMBOL_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +// For the parser. +#define ID_SCOPE_SHIFT 3 +#define ID_SCOPE_MASK 0x07 +#define ID_LOCAL 0x00 +#define ID_INSTANCE 0x01 +#define ID_GLOBAL 0x03 +#define ID_ATTRSET 0x04 +#define ID_CONST 0x05 +#define ID_CLASS 0x06 +#define ID_JUNK 0x07 +#define ID_INTERNAL ID_JUNK + +#define is_notop_id(id) (true) +#define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL) +#define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL) +#define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE) +#define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET) +#define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST) +#define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS) +#define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK) + +#define is_asgn_or_id(id) ((is_notop_id(id)) && \ + (((id)&ID_SCOPE_MASK) == ID_GLOBAL || \ + ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \ + ((id)&ID_SCOPE_MASK) == ID_CLASS)) + +struct rb_op_tbl_entry { + ID token; + const char *name; +}; + +// Defined in parse.y. +extern struct rb_op_tbl_entry rb_op_tbl[]; + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif // __SYMBOL_H_ Modified: MacRuby/branches/icu/vm.cpp =================================================================== --- MacRuby/branches/icu/vm.cpp 2010-02-25 08:15:14 UTC (rev 3616) +++ MacRuby/branches/icu/vm.cpp 2010-02-25 08:17:43 UTC (rev 3617) @@ -2845,8 +2845,8 @@ int n = 0; VALUE args[3]; - args[n++] = rb_funcall(rb_cNameErrorMesg, '!', 3, rb_str_new2(format), - obj, meth); + VALUE not_args[3] = {rb_str_new2(format), obj, meth}; + args[n++] = rb_vm_call(rb_cNameErrorMesg, selNot2, 3, not_args, false); args[n++] = meth; if (exc == rb_eNoMethodError) { args[n++] = rb_ary_new4(argc - 1, argv + 1);
participants (1)
-
source_changes@macosforge.org