Revision: 192 http://trac.macosforge.org/projects/ruby/changeset/192 Author: lsansonetti@apple.com Date: 2008-05-15 01:06:22 -0700 (Thu, 15 May 2008) Log Message: ----------- removing the upstream encoding implementations and using CFString's instead + misc bug and memory leak fixes Modified Paths: -------------- MacRuby/trunk/bs.c MacRuby/trunk/debug.c MacRuby/trunk/encoding.c MacRuby/trunk/gc.c MacRuby/trunk/include/ruby/encoding.h MacRuby/trunk/io.c MacRuby/trunk/marshal.c MacRuby/trunk/numeric.c MacRuby/trunk/objc.m MacRuby/trunk/parse.y MacRuby/trunk/re.c MacRuby/trunk/ruby.c MacRuby/trunk/string.c MacRuby/trunk/time.c MacRuby/trunk/transcode.c Modified: MacRuby/trunk/bs.c =================================================================== --- MacRuby/trunk/bs.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/bs.c 2008-05-15 08:06:22 UTC (rev 192) @@ -186,6 +186,7 @@ break; } } + free(type_modifier); } static inline bool @@ -921,6 +922,7 @@ ASSERT_ALLOC(bs_informal_method); bs_informal_method->name = sel_registerName(selector); + free(selector); bs_informal_method->class_method = get_boolean_attribute(reader, "class_method", false); bs_informal_method->type = method_type; Modified: MacRuby/trunk/debug.c =================================================================== --- MacRuby/trunk/debug.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/debug.c 2008-05-15 08:06:22 UTC (rev 192) @@ -21,6 +21,7 @@ enum ruby_value_type value_type; enum node_type node_type; enum { +#if !WITH_OBJC RUBY_ENCODING_INLINE_MAX = ENCODING_INLINE_MAX, RUBY_ENCODING_SHIFT = ENCODING_SHIFT, RUBY_ENCODING_MASK = ENCODING_MASK, @@ -28,7 +29,8 @@ RUBY_ENC_CODERANGE_UNKNOWN = ENC_CODERANGE_UNKNOWN, RUBY_ENC_CODERANGE_7BIT = ENC_CODERANGE_7BIT, RUBY_ENC_CODERANGE_VALID = ENC_CODERANGE_VALID, - RUBY_ENC_CODERANGE_BROKEN = ENC_CODERANGE_BROKEN, + RUBY_ENC_CODERANGE_BROKEN = ENC_CODERANGE_BROKEN, +#endif RUBY_FL_MARK = FL_MARK, RUBY_FL_RESERVED = FL_RESERVED, RUBY_FL_FINALIZE = FL_FINALIZE, Modified: MacRuby/trunk/encoding.c =================================================================== --- MacRuby/trunk/encoding.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/encoding.c 2008-05-15 08:06:22 UTC (rev 192) @@ -20,6 +20,100 @@ static ID id_encoding, id_base_encoding; static VALUE rb_cEncoding; +#if WITH_OBJC + +static CFMutableDictionaryRef __encodings = NULL; + +static VALUE +enc_new(const CFStringEncoding *enc) +{ + return Data_Wrap_Struct(rb_cEncoding, NULL, NULL, (void *)enc); +} + +static void +enc_init_db(void) +{ + const CFStringEncoding *e; + + __encodings = CFDictionaryCreateMutable(NULL, 0, NULL, NULL); + + e = CFStringGetListOfAvailableEncodings(); + while (e != NULL && *e != kCFStringEncodingInvalidId) { + VALUE iana; + VALUE encoding; + + encoding = enc_new(e); + + iana = (VALUE)CFStringConvertEncodingToIANACharSetName(*e); + if (iana != 0) { + const char *name; + char *p; + + name = RSTRING_CPTR(iana); + p = strchr(name, '-'); + if ((p = strchr(name, '-')) != NULL + || islower(*name)) { + char *tmp = alloca(strlen(name)); + strcpy(tmp, name); + if (p != NULL) { + p = tmp + (p - name); + do { + *p = '_'; + p++; + p = strchr(p, '-'); + } + while (p != NULL); + } + if (islower(*tmp)) + *tmp = toupper(*tmp); + name = tmp; + } + rb_define_const(rb_cEncoding, name, encoding); + } + CFDictionarySetValue(__encodings, (const void *)(*e), + (const void *)encoding); + e++; + } + + assert(CFDictionaryGetCount((CFDictionaryRef)__encodings) > 0); +} + +static VALUE +enc_make(const CFStringEncoding *enc) +{ + VALUE v; + v = (VALUE)CFDictionaryGetValue( (CFDictionaryRef)__encodings, + (const void *)(*enc)); + assert(v != 0); + return v; +} + +VALUE +rb_enc_from_encoding(rb_encoding *enc) +{ + return enc_make(enc); +} + +static inline CFStringEncoding +rb_enc_to_enc(VALUE v) +{ + return *(CFStringEncoding *)DATA_PTR(v); +} + +static inline CFStringEncoding * +rb_enc_to_enc_ptr(VALUE v) +{ + return (CFStringEncoding *)DATA_PTR(v); +} + +rb_encoding * +rb_to_encoding(VALUE v) +{ + return rb_enc_to_enc_ptr(v); +} + +#else + struct rb_encoding_entry { const char *name; rb_encoding *enc; @@ -81,9 +175,6 @@ { VALUE enc = Data_Wrap_Struct(rb_cEncoding, enc_mark, 0, encoding); encoding->auxiliary_data = (void *)enc; -#if WITH_OBJC - rb_objc_retain(enc); -#endif return enc; } @@ -348,12 +439,17 @@ return index; } #endif +#endif // WITH_OBJC int rb_enc_dummy_p(rb_encoding *enc) { +#if WITH_OBJC + return Qfalse; +#else VALUE encoding = rb_enc_from_encoding(enc); return ENC_DUMMY_P(encoding); +#endif } /* @@ -375,6 +471,7 @@ return rb_enc_dummy_p(rb_to_encoding(enc)) ? Qtrue : Qfalse; } +#if !WITH_OBJC static int enc_alias(const char *alias, int idx) { @@ -576,6 +673,7 @@ rb_raise(rb_eTypeError, "wrong argument type %s (not encode capable)", etype); } } +#endif ID rb_id_encoding(void) @@ -586,12 +684,10 @@ return id_encoding; } +#if !WITH_OBJC int rb_enc_internal_get_index(VALUE obj) { -#if WITH_OBJC - return 0; -#else int i; i = ENCODING_GET_INLINED(obj); @@ -602,13 +698,11 @@ i = NUM2INT(iv); } return i; -#endif } void rb_enc_internal_set_index(VALUE obj, int idx) { -#if !WITH_OBJC if (idx < ENCODING_INLINE_MAX) { ENCODING_SET_INLINED(obj, idx); return; @@ -616,7 +710,6 @@ ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); return; -#endif } void @@ -648,31 +741,31 @@ rb_encoding* rb_enc_get(VALUE obj) { -#if WITH_OBJC - return rb_ascii8bit_encoding(); /* FIXME */ -#else return rb_enc_from_index(rb_enc_get_index(obj)); -#endif } rb_encoding* rb_enc_check(VALUE str1, VALUE str2) { -#if WITH_OBJC - return NULL; -#else rb_encoding *enc = rb_enc_compatible(str1, str2); if (!enc) rb_raise(rb_eArgError, "character encodings differ: %s and %s", rb_enc_name(rb_enc_get(str1)), rb_enc_name(rb_enc_get(str2))); return enc; +} #endif -} rb_encoding* rb_enc_compatible(VALUE str1, VALUE str2) { +#if WITH_OBJC + /* TODO */ + rb_encoding *enc = rb_enc_get(str1); + if (enc == rb_enc_get(str2)) + return enc; + return NULL; +#else int idx1, idx2; rb_encoding *enc1, *enc2; @@ -724,15 +817,17 @@ return enc2; } return 0; +#endif } +#if !WITH_OBJC void rb_enc_copy(VALUE obj1, VALUE obj2) { rb_enc_associate_index(obj1, rb_enc_get_index(obj2)); } +#endif - /* * call-seq: * obj.encoding => encoding @@ -743,26 +838,18 @@ VALUE rb_obj_encoding(VALUE obj) { -#if WITH_OBJC - /* TODO */ - return Qnil; -#else rb_encoding *enc = rb_enc_get(obj); if (!enc) { rb_raise(rb_eTypeError, "unknown encoding"); } return rb_enc_from_encoding(enc); -#endif } +#if !WITH_OBJC int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) { -#if WITH_OBJC - int n = 1; -#else int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); -#endif if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) return MBCLEN_CHARFOUND_LEN(n); else { @@ -840,6 +927,7 @@ { return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c)); } +#endif /* * call-seq: @@ -853,11 +941,24 @@ static VALUE enc_inspect(VALUE self) { +#if WITH_OBJC + char buffer[512]; + VALUE enc_name; + long n; + + enc_name = (VALUE)CFStringGetNameOfEncoding(rb_enc_to_enc(self)); + + n = snprintf(buffer, sizeof buffer, "#<%s:%s>", rb_obj_classname(self), + RSTRING_CPTR(enc_name)); + + return rb_str_new(buffer, n); +#else VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self), rb_enc_name((rb_encoding*)DATA_PTR(self)), (ENC_DUMMY_P(self) ? " (dummy)" : "")); ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); return str; +#endif } /* @@ -871,7 +972,11 @@ static VALUE enc_name(VALUE self) { +#if WITH_OBJC + return (VALUE)CFStringConvertEncodingToIANACharSetName(rb_enc_to_enc(self)); +#else return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self))); +#endif } static VALUE @@ -898,9 +1003,21 @@ * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>] * */ + static VALUE enc_list(VALUE klass) { +#if WITH_OBJC + VALUE ary; + const CFStringEncoding *e; + + ary = rb_ary_new(); + e = CFStringGetListOfAvailableEncodings(); + while (e != NULL && *e != kCFStringEncodingInvalidId) { + rb_ary_push(ary, enc_make(e)); + e++; + } +#else VALUE ary = rb_ary_new2(enc_table.count); int i; for (i = 0; i < enc_table.count; ++i) { @@ -909,6 +1026,7 @@ rb_ary_push(ary, rb_enc_from_encoding(enc)); } } +#endif return ary; } @@ -925,8 +1043,25 @@ * */ static VALUE +enc_find2(VALUE enc) +{ + CFStringEncoding e; + + e = CFStringConvertIANACharSetNameToEncoding((CFStringRef)StringValue(enc)); + if (e == kCFStringEncodingInvalidId) + return Qnil; + return enc_make(&e); +} + +static VALUE enc_find(VALUE klass, VALUE enc) { +#if WITH_OBJC + VALUE e = enc_find2(enc); + if (e == Qnil) + rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); + return e; +#else int idx; StringValue(enc); @@ -938,6 +1073,7 @@ rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); } return rb_enc_from_encoding(rb_enc_from_index(idx)); +#endif } /* @@ -983,6 +1119,7 @@ return enc_find(klass, str); } +#if !WITH_OBJC rb_encoding * rb_ascii8bit_encoding(void) { @@ -1045,7 +1182,24 @@ { return rb_enc_from_encoding(rb_default_external_encoding()); } +#endif +#if WITH_OBJC +static rb_encoding *default_external; + +rb_encoding * +rb_default_external_encoding(void) +{ + return default_external; +} + +VALUE +rb_enc_default_external(void) +{ + return enc_make(default_external); +} +#endif + /* * call-seq: * Encoding.default_external => enc @@ -1063,7 +1217,11 @@ void rb_enc_set_default_external(VALUE encoding) { +#if WITH_OBJC + default_external = rb_enc_to_enc_ptr(encoding); +#else default_external_index = rb_enc_to_index(rb_to_encoding(encoding)); +#endif } /* @@ -1088,7 +1246,10 @@ VALUE rb_locale_charmap(VALUE klass) { -#if defined NO_LOCALE_CHARMAP +#if WITH_OBJC + CFStringEncoding enc = CFStringGetSystemEncoding(); + return (VALUE)CFStringConvertEncodingToIANACharSetName(enc); +#elif defined NO_LOCALE_CHARMAP return rb_usascii_str_new2("ASCII-8BIT"); #elif defined HAVE_LANGINFO_H char *codeset; @@ -1101,6 +1262,7 @@ #endif } +#if !WITH_OBJC static void set_encoding_const(const char *name, rb_encoding *enc) { @@ -1156,6 +1318,7 @@ rb_ary_push(ary, str); return ST_CONTINUE; } +#endif /* * call-seq: @@ -1176,11 +1339,22 @@ static VALUE rb_enc_name_list(VALUE klass) { +#if WITH_OBJC + VALUE ary, list; + long i, count; + + ary = rb_ary_new(); + list = enc_list(klass); + for (i = 0, count = RARRAY_LEN(list); i < count; i++) + rb_ary_push(ary, enc_name(RARRAY_AT(list, i))); +#else VALUE ary = rb_ary_new2(enc_table.names->num_entries); st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary); +#endif return ary; } +#if !WITH_OBJC static int rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg) { @@ -1204,6 +1378,7 @@ rb_hash_aset(aliases, key, str); return ST_CONTINUE; } +#endif /* * call-seq: @@ -1220,13 +1395,78 @@ static VALUE rb_enc_aliases(VALUE klass) { +#if WITH_OBJC + /* TODO: the CFString IANA <-> charset code does support aliases, we should + * find a way to return them here. + */ + return rb_hash_new(); +#else VALUE aliases[2]; aliases[0] = rb_hash_new(); aliases[1] = rb_ary_new(); st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases); return aliases[0]; +#endif } +const char * +rb_enc_name(rb_encoding *enc) +{ + CFStringRef str; + if (enc != NULL + && (str = CFStringConvertEncodingToIANACharSetName(*enc)) != NULL) + return RSTRING_CPTR(str); + return NULL; +} + +long +rb_enc_mbminlen(rb_encoding *enc) +{ + return rb_enc_mbmaxlen(enc); +} + +long +rb_enc_mbmaxlen(rb_encoding *enc) +{ + return CFStringGetMaximumSizeForEncoding(1, *enc); +} + +rb_encoding * +rb_enc_find(const char *name) +{ + return rb_enc_find2(rb_str_new2(name)); +} + +rb_encoding * +rb_enc_find2(VALUE name) +{ + VALUE e = enc_find2(name); + return e == Qnil ? NULL : rb_enc_to_enc_ptr(e); +} + +rb_encoding * +rb_enc_get(VALUE obj) +{ + int type = TYPE(obj); + if (type == T_STRING) { + CFStringEncoding enc = CFStringGetFastestEncoding((CFStringRef)obj); + if (enc == kCFStringEncodingInvalidId) + return NULL; + return rb_enc_to_enc_ptr(enc_make(&enc)); + } + else { + /* TODO */ + return NULL; + } +} + +rb_encoding * +rb_locale_encoding(void) +{ + CFStringEncoding enc = CFStringGetSystemEncoding(); + return rb_enc_to_enc_ptr(enc_make(&enc)); +} + void Init_Encoding(void) { Modified: MacRuby/trunk/gc.c =================================================================== --- MacRuby/trunk/gc.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/gc.c 2008-05-15 08:06:22 UTC (rev 192) @@ -2039,7 +2039,8 @@ if (ctx->class_of != 0) { if (ctx->class_of == rb_cClass) { /* Class is a special case. */ - if (TYPE(r->address) != T_CLASS + if (rb_objc_is_non_native(r->address) + || TYPE(r->address) != T_CLASS || FL_TEST(r->address, FL_SINGLETON)) continue; } Modified: MacRuby/trunk/include/ruby/encoding.h =================================================================== --- MacRuby/trunk/include/ruby/encoding.h 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/include/ruby/encoding.h 2008-05-15 08:06:22 UTC (rev 192) @@ -17,6 +17,13 @@ #else # include <varargs.h> #endif + +#if WITH_OBJC + +typedef CFStringEncoding rb_encoding; + +#else + #include "ruby/oniguruma.h" #define ENCODING_INLINE_MAX 1023 @@ -48,8 +55,6 @@ # define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0) #endif -#define ENCODING_MAXNAMELEN 42 - #define ENC_CODERANGE_MASK (FL_USER8|FL_USER9) #define ENC_CODERANGE_UNKNOWN 0 #define ENC_CODERANGE_7BIT FL_USER8 @@ -75,7 +80,10 @@ } while (0) typedef OnigEncodingType rb_encoding; +#endif +#define ENCODING_MAXNAMELEN 42 + int rb_enc_replicate(const char *, rb_encoding *); int rb_define_dummy_encoding(const char *); int rb_enc_dummy_p(rb_encoding *); @@ -108,12 +116,25 @@ /* name -> rb_encoding */ rb_encoding * rb_enc_find(const char *name); +#if WITH_OBJC +rb_encoding * rb_enc_find2(VALUE name); +#endif + /* encoding -> name */ +#if WITH_OBJC +const char *rb_enc_name(rb_encoding *); +#else #define rb_enc_name(enc) (enc)->name +#endif /* encoding -> minlen/maxlen */ +#if WITH_OBJC +long rb_enc_mbminlen(rb_encoding *); +long rb_enc_mbmaxlen(rb_encoding *); +#else #define rb_enc_mbminlen(enc) (enc)->min_enc_len #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len +#endif /* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */ int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); @@ -148,6 +169,17 @@ /* ptr, ptr, encoding -> newline_or_not */ #define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end)) +#if WITH_OBJC +#define rb_enc_isctype(c,t,enc) (iswctype(c,t)) +#define rb_enc_isascii(c,enc) (iswascii(c)) +#define rb_enc_isalpha(c,enc) (iswalpha(c)) +#define rb_enc_islower(c,enc) (iswlower(c)) +#define rb_enc_isupper(c,enc) (iswupper(c)) +#define rb_enc_isalnum(c,enc) (iswalnum(c)) +#define rb_enc_isprint(c,enc) (iswprint(c)) +#define rb_enc_isspace(c,enc) (iswspace(c)) +#define rb_enc_isdigit(c,enc) (iswdigit(c)) +#else #define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) #define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c) @@ -157,6 +189,7 @@ #define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c) #define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c) #define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c) +#endif #define rb_enc_asciicompat(enc) (!rb_enc_dummy_p(enc) && rb_enc_mbminlen(enc)==1) Modified: MacRuby/trunk/io.c =================================================================== --- MacRuby/trunk/io.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/io.c 2008-05-15 08:06:22 UTC (rev 192) @@ -1351,6 +1351,7 @@ io_enc_str(VALUE str, rb_io_t *fptr) { OBJ_TAINT(str); +#if !WITH_OBJC if (fptr->enc2) { /* two encodings, so transcode from enc2 to enc */ /* the methods in transcode.c are static, so call indirectly */ @@ -1362,6 +1363,7 @@ /* just one encoding, so associate it with the string */ rb_enc_associate(str, io_read_encoding(fptr)); } +#endif return str; } @@ -1372,7 +1374,9 @@ long n; long pos = 0; rb_encoding *enc = io_input_encoding(fptr); +#if !WITH_OBJC int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0; +#endif if (siz == 0) siz = BUFSIZ; if (NIL_P(str)) { @@ -1388,17 +1392,21 @@ break; } bytes += n; +#if !WITH_OBJC if (cr != ENC_CODERANGE_BROKEN) pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + bytes, enc, &cr); +#endif if (bytes < siz) break; siz += BUFSIZ; rb_str_resize(str, siz); } if (bytes != siz) rb_str_resize(str, bytes); str = io_enc_str(str, fptr); +#if !WITH_OBJC if (!fptr->enc2) { ENC_CODERANGE_SET(str, cr); } +#endif return str; } @@ -1738,6 +1746,7 @@ RSTRING_PTR(str)[last++] = c; } if (limit > 0 && limit == pending) { +#if !WITH_OBJC char *p = fptr->rbuf+fptr->rbuf_off; char *pp = p + limit; char *pl = rb_enc_left_char_head(p, pp, enc); @@ -1748,6 +1757,7 @@ limit = pending; rb_str_set_len(str, RSTRING_LEN(str)-diff); } +#endif } read_buffered_data(RSTRING_PTR(str) + last, pending, fptr); /* must not fail */ limit -= pending; @@ -1816,7 +1826,9 @@ int len = 0; long pos = 0; rb_encoding *enc = io_input_encoding(fptr); +#if !WITH_OBJC int cr = fptr->enc2 ? ENC_CODERANGE_BROKEN : 0; +#endif for (;;) { long pending = READ_DATA_PENDING_COUNT(fptr); @@ -1839,8 +1851,10 @@ read_buffered_data(RSTRING_PTR(str)+len, pending, fptr); } len += pending; +#if !WITH_OBJC if (cr != ENC_CODERANGE_BROKEN) pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + len, enc, &cr); +#endif if (e) break; } rb_thread_wait_fd(fptr->fd); @@ -1853,7 +1867,9 @@ RSTRING_SYNC(str); str = io_enc_str(str, fptr); +#if !WITH_OBJC if (!fptr->enc2) ENC_CODERANGE_SET(str, cr); +#endif fptr->lineno++; ARGF.lineno = INT2FIX(fptr->lineno); return str; @@ -1884,6 +1900,7 @@ } } if (!NIL_P(rs)) { +#if !WITH_OBJC rb_encoding *enc_rs, *enc_io; GetOpenFile(io, fptr); @@ -1911,6 +1928,7 @@ rs = rs2; } } +#endif } *rsp = rs; *limit = NIL_P(lim) ? -1L : NUM2LONG(lim); @@ -1934,8 +1952,12 @@ else if (limit == 0) { return rb_enc_str_new(0, 0, io_read_encoding(fptr)); } - else if (rs == rb_default_rs && limit < 0 && - rb_enc_asciicompat(io_read_encoding(fptr))) { + else if (rs == rb_default_rs && limit < 0 +#if WITH_OBJC + ) { +#else + && rb_enc_asciicompat(io_read_encoding(fptr))) { +#endif return rb_io_getline_fast(fptr); } else { @@ -1964,8 +1986,10 @@ if (RSTRING_LEN(str) < rslen) continue; s = RSTRING_PTR(str); p = s + RSTRING_LEN(str) - rslen; +#if !WITH_OBJC pp = rb_enc_left_char_head(s, p, enc); if (pp != p) continue; +#endif if (!rspara) rscheck(rsptr, rslen, rs); if (memcmp(p, rsptr, rslen) == 0) break; } @@ -2296,6 +2320,11 @@ if (io_fillbuf(fptr) < 0) { return Qnil; } +#if WITH_OBJC + /* FIXME */ + if (0) { + } +#else r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); if (MBCLEN_CHARFOUND_P(r) && (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) { @@ -2317,6 +2346,7 @@ } } } +#endif else { str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1); fptr->rbuf_off++; @@ -2447,10 +2477,14 @@ enc = io_read_encoding(fptr); if (FIXNUM_P(c)) { int cc = FIX2INT(c); +#if WITH_OBJC + c = rb_str_new((char *)&cc, 1); +#else char buf[16]; rb_enc_mbcput(cc, buf, enc); c = rb_str_new(buf, rb_enc_codelen(cc, enc)); +#endif } else { SafeStringValue(c); @@ -3227,11 +3261,24 @@ { const char *p0, *p1; char *enc2name; +#if WITH_OBJC + rb_encoding *enc1, enc2; +#else int idx, idx2; - +#endif + p0 = strrchr(estr, ':'); if (!p0) p1 = estr; else p1 = p0 + 1; +#if WITH_OBJC + enc1 = rb_enc_find(p1); + if (enc1 != NULL) { + fptr->enc = enc1; + } + else { + rb_warn("Unsupported encoding %s ignored", p1); + } +#else idx = rb_enc_find_index(p1); if (idx >= 0) { fptr->enc = rb_enc_from_index(idx); @@ -3239,28 +3286,49 @@ else { rb_warn("Unsupported encoding %s ignored", p1); } +#endif if (p0) { int n = p0 - estr; if (n > ENCODING_MAXNAMELEN) { +#if WITH_OBJC + enc2 = NULL; +#else idx2 = -1; +#endif } else { enc2name = ALLOCA_N(char, n+1); memcpy(enc2name, estr, n); enc2name[n] = '\0'; estr = enc2name; +#if WITH_OBJC + enc2 = rb_enc_find(enc2name); +#else idx2 = rb_enc_find_index(enc2name); +#endif } +#if WITH_OBJC + if (enc2 == NULL) { +#else if (idx2 < 0) { +#endif rb_warn("Unsupported encoding %.*s ignored", n, estr); } +#if WITH_OBJC + else if (enc1 == enc2) { +#else else if (idx2 == idx) { +#endif rb_warn("Ignoring internal encoding %.*s: it is identical to external encoding %s", n, estr, p1); } else { +#if WITH_OBJC + fptr->enc2 = enc2; +#else fptr->enc2 = rb_enc_from_index(idx2); +#endif } } } Modified: MacRuby/trunk/marshal.c =================================================================== --- MacRuby/trunk/marshal.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/marshal.c 2008-05-15 08:06:22 UTC (rev 192) @@ -469,8 +469,18 @@ static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg) { + rb_encoding *enc = 0; +#if WITH_OBJC + const char *name; + + enc = rb_enc_get(obj); + if (enc == NULL) { + w_long(num, arg->arg); + return; + } + name = rb_enc_name(enc); +#else int encidx = rb_enc_get_index(obj); - rb_encoding *enc = 0; st_data_t name; if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) { @@ -487,6 +497,7 @@ name = (st_data_t)rb_str_new2(rb_enc_name(enc)); st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name); } while (0); +#endif w_object(name, arg->arg, arg->limit); } @@ -1110,7 +1121,11 @@ while (len--) { ID id = r_symbol(arg); VALUE val = r_object(arg); +#if WITH_OBJC + if (0) { +#else if (id == rb_id_encoding()) { +#endif int idx = rb_enc_find_index(StringValueCStr(val)); if (idx > 0) rb_enc_associate_index(obj, idx); } Modified: MacRuby/trunk/numeric.c =================================================================== --- MacRuby/trunk/numeric.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/numeric.c 2008-05-15 08:06:22 UTC (rev 192) @@ -1863,12 +1863,17 @@ rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1)", argc); break; } +#if WITH_OBJC + /* TODO */ + rb_notimplement(); +#else enc = rb_to_encoding(argv[0]); if (!enc) enc = rb_ascii8bit_encoding(); if (i < 0 || (n = rb_enc_codelen(i, enc)) <= 0) goto out_of_range; str = rb_enc_str_new(0, n, enc); rb_enc_mbcput(i, RSTRING_PTR(str), enc); return str; +#endif } /******************************************************************** Modified: MacRuby/trunk/objc.m =================================================================== --- MacRuby/trunk/objc.m 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/objc.m 2008-05-15 08:06:22 UTC (rev 192) @@ -460,6 +460,7 @@ { char v = RTEST(rval); *(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberCharType, &v); + CFMakeCollectable(*(id *)ocval); return true; } @@ -467,6 +468,7 @@ { double v = RFLOAT_VALUE(rval); *(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberDoubleType, &v); + CFMakeCollectable(*(id *)ocval); return true; } @@ -487,6 +489,7 @@ *(id *)ocval = (id)CFNumberCreate(NULL, kCFNumberLongType, &v); #endif } + CFMakeCollectable(*(id *)ocval); return true; } @@ -495,6 +498,7 @@ ID name = SYM2ID(rval); *(id *)ocval = (id)CFStringCreateWithCString(NULL, rb_id2name(name), kCFStringEncodingASCII); /* XXX this is temporary */ + CFMakeCollectable(*(id *)ocval); return true; } } @@ -2120,7 +2124,7 @@ if (bs_find_path(framework_path, path, sizeof path)) { if (!bs_parse(path, 0, bs_parse_cb, NULL, &error)) rb_raise(rb_eRuntimeError, error); -#if 1 +#if 0 /* FIXME 'GC capability mismatch' with .dylib files */ p = strrchr(path, '.'); assert(p != NULL); Modified: MacRuby/trunk/parse.y =================================================================== --- MacRuby/trunk/parse.y 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/parse.y 2008-05-15 08:06:22 UTC (rev 192) @@ -266,14 +266,23 @@ #endif }; -#define UTF8_ENC() (parser->utf8 ? parser->utf8 : \ +#if WITH_OBJC +# define UTF8_ENC() (NULL) +#else +# define UTF8_ENC() (parser->utf8 ? parser->utf8 : \ (parser->utf8 = rb_utf8_encoding())) +#endif #define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc) #define STR_NEW0() rb_usascii_str_new(0,0) #define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc) #define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc) -#define STR_ENC(m) ((m)?parser->enc:rb_usascii_encoding()) -#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT) +#if WITH_OBJC +# define STR_ENC(m) (parser->enc) +# define ENC_SINGLE(cr) (1) +#else +# define STR_ENC(m) ((m)?parser->enc:rb_usascii_encoding()) +# define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT) +#endif #define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb)) #ifdef YYMALLOC @@ -4641,8 +4650,10 @@ # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) #endif +#if !WITH_OBJC #define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc) #define parser_precise_mbclen() rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc) +#endif #define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p)) #define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc)) @@ -4678,11 +4689,19 @@ if (len > max_line_margin * 2 + 10) { if (lex_p - p > max_line_margin) { +#if WITH_OBJC + p = lex_p - max_line_margin; +#else p = rb_enc_prev_char(p, lex_p - max_line_margin, rb_enc_get(lex_lastline)); +#endif pre = "..."; } if (pe - lex_p > max_line_margin) { +#if WITH_OBJC + pe = lex_p + max_line_margin; +#else pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, rb_enc_get(lex_lastline)); +#endif post = "..."; } len = pe - p; @@ -4956,6 +4975,7 @@ #endif str = rb_enc_str_new(p, n, enc); +#if !WITH_OBJC if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) { if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { rb_enc_associate(str, rb_usascii_encoding()); @@ -4964,6 +4984,7 @@ rb_enc_associate(str, rb_ascii8bit_encoding()); } } +#endif return str; } @@ -5281,8 +5302,14 @@ static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc) { +#if WITH_OBJC + /* FIXME */ + char *buf = tokspace(1); + *(buf) = c; +#else int len = rb_enc_codelen(c, enc); rb_enc_mbcput(c, tokspace(len), enc); +#endif } static int @@ -5409,11 +5436,15 @@ static int parser_tokadd_mbchar(struct parser_params *parser, int c) { +#if WITH_OBJC + int len = 1; +#else int len = parser_precise_mbclen(); if (!MBCLEN_CHARFOUND_P(len)) { compile_error(PARSER_ARG "invalid multibyte char"); return -1; } +#endif tokadd(c); lex_p += --len; if (len > 0) tokcopy(len); @@ -5856,8 +5887,15 @@ static void parser_set_encode(struct parser_params *parser, const char *name) { + rb_encoding *enc; +#if WITH_OBJC + enc = rb_enc_find(name); + if (enc == NULL) { + rb_raise(rb_eArgError, "unknown encoding name: %s", name); + } + /* TODO should raise if the encoding is not ASCII compatible */ +#else int idx = rb_enc_find_index(name); - rb_encoding *enc; if (idx < 0) { rb_raise(rb_eArgError, "unknown encoding name: %s", name); @@ -5866,6 +5904,7 @@ if (!rb_enc_asciicompat(enc)) { rb_raise(rb_eArgError, "%s is not ASCII compatible", rb_enc_name(enc)); } +#endif parser->enc = enc; } @@ -6085,8 +6124,10 @@ } pushback(c); parser->enc = rb_enc_get(lex_lastline); +#if !WITH_OBJC if (parser->enc == NULL) parser->enc = rb_utf8_encoding(); +#endif } #define IS_ARG() (lex_state == EXPR_ARG || lex_state == EXPR_CMDARG) @@ -7248,9 +7289,13 @@ break; } +#if !WITH_OBJC mb = ENC_CODERANGE_7BIT; +#endif do { +#if !WITH_OBJC if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN; +#endif if (tokadd_mbchar(c) == -1) return 0; c = nextc(); } while (parser_is_identchar()); @@ -7303,7 +7348,11 @@ } } +#if WITH_OBJC + if (lex_state != EXPR_DOT) { +#else if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) { +#endif const struct kwtable *kw; /* See if it is a reserved word. */ @@ -7557,11 +7606,13 @@ static void literal_concat0(struct parser_params *parser, VALUE head, VALUE tail) { +#if !WITH_OBJC if (!rb_enc_compatible(head, tail)) { compile_error(PARSER_ARG "string literal encodings differ (%s / %s)", rb_enc_name(rb_enc_get(head)), rb_enc_name(rb_enc_get(tail))); } +#endif RSTRING_SYNC(head); rb_str_buf_append(head, tail); } @@ -8629,6 +8680,9 @@ static void reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) { +#if WITH_OBJC + /* TODO */ +#else int c = RE_OPTION_ENCODING_IDX(options); if (c) { @@ -8663,6 +8717,7 @@ compile_error(PARSER_ARG "regexp encoding option '%c' differs from source encoding '%s'", c, rb_enc_name(rb_enc_get(str))); +#endif } static void @@ -8999,7 +9054,11 @@ ++m; if (m < e && is_identchar(m, e, enc)) { if (!ISASCII(*m)) mb = 1; +#if WITH_OBJC + m += e-m; +#else m += rb_enc_mbclen(m, e, enc); +#endif } break; default: @@ -9015,7 +9074,11 @@ int rb_symname_p(const char *name) { +#if WITH_OBJC + return rb_enc_symname_p(name, NULL); +#else return rb_enc_symname_p(name, rb_ascii8bit_encoding()); +#endif } int @@ -9096,7 +9159,11 @@ id: if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m))) return Qfalse; +#if WITH_OBJC + while (m < e && is_identchar(m, e, enc)) m += e-m; +#else while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); +#endif if (localid) { switch (*m) { case '!': case '?': case '=': ++m; @@ -9152,7 +9219,13 @@ case '$': id |= ID_GLOBAL; if ((mb = is_special_global_name(++m, e, enc)) != 0) { - if (!--mb) enc = rb_ascii8bit_encoding(); + if (!--mb) { +#if WITH_OBJC + enc = NULL; +#else + enc = rb_ascii8bit_encoding(); +#endif + } goto new_id; } break; @@ -9199,6 +9272,7 @@ } break; } +#if !WITH_OBJC mb = 0; if (!rb_enc_isdigit(*m, enc)) { while (m <= name + last && is_identchar(m, e, enc)) { @@ -9225,12 +9299,12 @@ } mbstr:; } +#endif new_id: id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; id_register: str = rb_enc_str_new(name, len, enc); -// TODO -// OBJ_FREEZE(str); + OBJ_FREEZE(str); #if WITH_OBJC CFDictionarySetValue(global_symbols.sym_id, (const void *)name_hash, (const void *)id); @@ -9246,7 +9320,11 @@ ID rb_intern2(const char *name, long len) { +#if WITH_OBJC + return rb_intern3(name, len, NULL); +#else return rb_intern3(name, len, rb_usascii_encoding()); +#endif } #undef rb_intern @@ -9262,12 +9340,16 @@ rb_encoding *enc; ID id; +#if WITH_OBJC + enc = rb_enc_get(str); +#else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { enc = rb_usascii_encoding(); } else { enc = rb_enc_get(str); } +#endif id = rb_intern3(RSTRING_CPTR(str), RSTRING_CLEN(str), enc); RB_GC_GUARD(str); return id; @@ -9467,7 +9549,11 @@ #ifdef YYMALLOC parser->heap = NULL; #endif +#if WITH_OBJC + parser->enc = NULL; +#else parser->enc = rb_usascii_encoding(); +#endif } extern void rb_mark_source_filename(char *); Modified: MacRuby/trunk/re.c =================================================================== --- MacRuby/trunk/re.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/re.c 2008-05-15 08:06:22 UTC (rev 192) @@ -184,6 +184,7 @@ case 'n': *kcode = -1; return (*option = ARG_ENCODING_NONE); +#if !WITH_OBJC case 'e': *kcode = rb_enc_find_index("EUC-JP"); break; @@ -193,6 +194,7 @@ case 'u': *kcode = rb_enc_find_index("UTF-8"); break; +#endif default: *kcode = -1; return (*option = char_to_option(c)); @@ -219,10 +221,16 @@ p = s; pend = p + len; while (p<pend) { +#if WITH_OBJC + c = *p; + clen = 1; + if (0) {} +#else c = rb_enc_ascget(p, pend, &clen, enc); if (c == -1) { p += mbclen(p, pend, enc); } +#endif else if (c != '/' && rb_enc_isprint(c, enc)) { p += clen; } @@ -237,9 +245,18 @@ else { p = s; while (p<pend) { +#if WITH_OBJC + c = *p; + clen = 1; +#else c = rb_enc_ascget(p, pend, &clen, enc); +#endif if (c == '\\' && p+clen < pend) { +#if WITH_OBJC + int n = clen + (pend - (p+clen)); +#else int n = clen + mbclen(p+clen, pend, enc); +#endif rb_str_buf_cat(str, p, n); p += n; continue; @@ -249,12 +266,14 @@ rb_str_buf_cat(str, &c, 1); rb_str_buf_cat(str, p, clen); } +#if !WITH_OBJC else if (c == -1) { int l = mbclen(p, pend, enc); rb_str_buf_cat(str, p, l); p += l; continue; } +#endif else if (rb_enc_isprint(c, enc)) { rb_str_buf_cat(str, p, clen); } @@ -277,7 +296,9 @@ { VALUE str = rb_str_buf_new2("/"); +#if !WITH_OBJC rb_enc_copy(str, re); +#endif rb_reg_expr_str(str, s, len); rb_str_buf_cat2(str, "/"); if (re) { @@ -374,7 +395,9 @@ rb_reg_check(re); +#if !WITH_OBJC rb_enc_copy(str, re); +#endif options = RREGEXP(re)->ptr->options; ptr = (UChar*)RREGEXP(re)->str; len = RREGEXP(re)->len; @@ -416,9 +439,17 @@ if (*ptr == ':' && ptr[len-1] == ')') { int r; Regexp *rp; + OnigEncoding oenc; + +#if WITH_OBJC + oenc = ONIG_ENCODING_ASCII; +#else + oenc = rb_enc_get(re); +#endif + r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT, ONIGENC_CASE_FOLD_DEFAULT, - rb_enc_get(re), + oenc, OnigDefaultSyntax); if (r == 0) { ++ptr; @@ -445,7 +476,9 @@ rb_str_buf_cat2(str, ":"); rb_reg_expr_str(str, (char*)ptr, len); rb_str_buf_cat2(str, ")"); +#if !WITH_OBJC rb_enc_copy(str, re); +#endif OBJ_INFECT(str, re); return str; @@ -465,7 +498,9 @@ char opts[6]; VALUE desc = rb_str_buf_new2(err); +#if !WITH_OBJC rb_enc_associate(desc, enc); +#endif rb_str_buf_cat2(desc, ": /"); rb_reg_expr_str(desc, s, len); opts[0] = '/'; @@ -629,6 +664,7 @@ Regexp *rp; int r; OnigErrorInfo einfo; + OnigEncoding oenc; /* Handle escaped characters first. */ @@ -637,8 +673,14 @@ from that. */ +#if WITH_OBJC + oenc = ONIG_ENCODING_ASCII; +#else + oenc = enc; +#endif + r = onig_alloc_init(&rp, flags, ONIGENC_CASE_FOLD_DEFAULT, - enc, OnigDefaultSyntax); + oenc, OnigDefaultSyntax); if (r) { onig_error_code_to_str((UChar*)err, r); return 0; @@ -742,7 +784,12 @@ c = 0; for (i = 0; i < num_pos; i++) { q = s + pairs[i].byte_pos; +#if WITH_OBJC + //long n = strlen(p); + c += q-p;//(n > (q-p) ? q-p : n); +#else c += rb_enc_strlen(p, q, enc); +#endif pairs[i].char_pos = c; p = q; } @@ -1052,20 +1099,25 @@ int need_recompile = 0; rb_encoding *enc; +#if WITH_OBJC + need_recompile = 0; +#else if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { rb_raise(rb_eArgError, "broken %s string", rb_enc_name(rb_enc_get(str))); } +#endif rb_reg_check(re); /* ignorecase status */ +#if !WITH_OBJC if (rb_reg_fixed_encoding_p(re) || !rb_enc_str_asciicompat_p(str)) { if (ENCODING_GET(re) != rb_enc_get_index(str) && rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { rb_raise(rb_eArgError, "incompatible encoding regexp match (%s regexp with %s string)", - rb_enc_name(rb_enc_from_index(ENCODING_GET(re))), + rb_enc_name(rb_enc_get(re)), rb_enc_name(rb_enc_get(str))); } } @@ -1082,6 +1134,7 @@ rb_enc_name(enc)); } } +#endif if (need_recompile) { onig_errmsg_buffer err = ""; @@ -1104,10 +1157,14 @@ rb_raise(rb_eArgError, "regexp preprocess failed: %s", err); } +#if WITH_OBJC + enc = (rb_encoding *)ONIG_ENCODING_ASCII; +#endif + r = onig_new(®2, (UChar* )RSTRING_CPTR(unescaped), (UChar* )(RSTRING_CPTR(unescaped) + RSTRING_CLEN(unescaped)), - reg->options, enc, + reg->options, (OnigEncoding)enc, OnigDefaultSyntax, &einfo); if (r) { onig_error_code_to_str((UChar*)err, r, &einfo); @@ -1815,8 +1872,12 @@ } chbuf[chlen++] = byte; - while (chlen < chmaxlen && - MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) { + while (chlen < chmaxlen +#if WITH_OBJC + && 1) { +#else + && MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) { +#endif byte = read_escaped_byte(&p, end, err); if (byte == -1) { return -1; @@ -1824,11 +1885,13 @@ chbuf[chlen++] = byte; } +#if !WITH_OBJC l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc); if (MBCLEN_INVALID_P(l)) { strcpy(err, "invalid multibyte escape"); return -1; } +#endif if (1 < chlen || (chbuf[0] & 0x80)) { rb_str_buf_cat(buf, chbuf, chlen); @@ -1876,12 +1939,14 @@ len = rb_uv_to_utf8(utf8buf, uv); rb_str_buf_cat(buf, utf8buf, len); +#if !WITH_OBJC if (*encp == 0) *encp = rb_utf8_encoding(); else if (*encp != rb_utf8_encoding()) { strcpy(err, "UTF-8 character in non UTF-8 regexp"); return -1; } +#endif } return 0; } @@ -1954,12 +2019,16 @@ char smallbuf[2]; while (p < end) { +#if WITH_OBJC + int chlen = 1; +#else int chlen = rb_enc_precise_mbclen(p, end, enc); if (!MBCLEN_CHARFOUND_P(chlen)) { strcpy(err, "invalid multibyte character"); return -1; } chlen = MBCLEN_CHARFOUND_LEN(chlen); +#endif if (1 < chlen || (*p & 0x80)) { rb_str_buf_cat(buf, p, chlen); p += chlen; @@ -2057,14 +2126,18 @@ *fixed_enc = 0; else { *fixed_enc = enc; +#if !WITH_OBJC rb_enc_associate(buf, enc); +#endif } if (unescape_nonascii(p, end, enc, buf, fixed_enc, err) != 0) return Qnil; if (*fixed_enc) { +#if !WITH_OBJC rb_enc_associate(buf, *fixed_enc); +#endif } return buf; @@ -2144,7 +2217,9 @@ rb_str_buf_append(result, str); } if (regexp_enc) { +#if !WITH_OBJC rb_enc_associate(result, regexp_enc); +#endif } return result; @@ -2157,7 +2232,11 @@ struct RRegexp *re = RREGEXP(obj); VALUE unescaped; rb_encoding *fixed_enc = 0; +#if WITH_OBJC + rb_encoding *a_enc = NULL; +#else rb_encoding *a_enc = rb_ascii8bit_encoding(); +#endif if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4) rb_raise(rb_eSecurityError, "Insecure: can't modify regexp"); @@ -2184,11 +2263,15 @@ enc = fixed_enc; } } +#if !WITH_OBJC else if (!(options & ARG_ENCODING_FIXED)) { enc = rb_usascii_encoding(); } +#endif +#if !WITH_OBJC rb_enc_associate((VALUE)re, enc); +#endif if ((options & ARG_ENCODING_FIXED) || fixed_enc) { re->basic.flags |= KCODE_FIXED; } @@ -2214,6 +2297,8 @@ int ret; rb_encoding *enc = rb_enc_get(str); if (options & ARG_ENCODING_NONE) { +#if !WITH_OBJC + /* TODO */ rb_encoding *ascii8bit = rb_ascii8bit_encoding(); if (enc != ascii8bit) { if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { @@ -2222,6 +2307,7 @@ } enc = ascii8bit; } +#endif } ret = rb_reg_initialize(obj, RSTRING_CPTR(str), RSTRING_CLEN(str), enc, options, err); @@ -2277,7 +2363,11 @@ VALUE rb_reg_new(const char *s, long len, int options) { +#if WITH_OBJC + return rb_enc_reg_new(s, len, NULL, options); +#else return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options); +#endif } VALUE @@ -2302,7 +2392,11 @@ { volatile VALUE save_str = str; if (reg_cache && RREGEXP(reg_cache)->len == RSTRING_CLEN(str) +#if WITH_OBJC + && rb_enc_get(reg_cache) == rb_enc_get(str) +#else && ENCODING_GET(reg_cache) == ENCODING_GET(str) +#endif && memcmp(RREGEXP(reg_cache)->str, RSTRING_CPTR(str), RSTRING_CLEN(str)) == 0) return reg_cache; @@ -2359,7 +2453,11 @@ if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse; if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse; if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse; +#if WITH_OBJC + if (rb_enc_get(re1) != rb_enc_get(re2)) return Qfalse; +#else if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse; +#endif if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0) { return Qtrue; } @@ -2643,7 +2741,9 @@ if (argc == 3 && !NIL_P(argv[2])) { char *kcode = StringValuePtr(argv[2]); if (kcode[0] == 'n' || kcode[1] == 'N') { +#if !WITH_OBJC enc = rb_ascii8bit_encoding(); +#endif flags |= ARG_ENCODING_FIXED; } else { @@ -2669,18 +2769,27 @@ char *t; VALUE tmp; int c, clen; +#if WITH_OBJC + int ascii_only = 0; +#else int ascii_only = rb_enc_str_asciionly_p(str); +#endif s = RSTRING_CPTR(str); if (s == NULL) return str; send = s + RSTRING_CLEN(str); while (s < send) { - c = rb_enc_ascget(s, send, &clen, enc); +#if WITH_OBJC + c = *s; + clen = 1; +#else + c = rb_enc_ascget(s, send, &clen, enc); if (c == -1) { s += mbclen(s, send, enc); continue; } +#endif switch (c) { case '[': case ']': case '{': case '}': case '(': case ')': case '|': case '-': @@ -2694,24 +2803,32 @@ } if (ascii_only) { str = rb_str_new3(str); +#if !WITH_OBJC rb_enc_associate(str, rb_usascii_encoding()); +#endif } return str; meta_found: tmp = rb_str_new(0, RSTRING_CLEN(str)*2); +#if !WITH_OBJC if (ascii_only) { rb_enc_associate(tmp, rb_usascii_encoding()); } else { rb_enc_copy(tmp, str); } +#endif t = RSTRING_PTR(tmp); /* copy upto metacharacter */ memcpy(t, RSTRING_CPTR(str), s - RSTRING_CPTR(str)); t += s - RSTRING_CPTR(str); while (s < send) { +#if WITH_OBJC + c = *s; + clen = 1; +#else c = rb_enc_ascget(s, send, &clen, enc); if (c == -1) { int n = mbclen(s, send, enc); @@ -2720,6 +2837,7 @@ *t++ = *s++; continue; } +#endif s += clen; switch (c) { case '[': case ']': case '{': case '}': @@ -2758,7 +2876,9 @@ } rb_str_resize(tmp, t - RSTRING_PTR(tmp)); RSTRING_SYNC(tmp); +#if !WITH_OBJC OBJ_INFECT(tmp, str); +#endif return tmp; } @@ -2894,9 +3014,11 @@ rb_raise(rb_eArgError, "incompatible encodings: %s and %s", rb_enc_name(has_ascii_incompat), rb_enc_name(enc)); } +#if !WITH_OBJC else if (rb_enc_str_asciionly_p(e)) { has_asciionly = 1; } +#endif else { if (!has_ascii_compat_fixed) has_ascii_compat_fixed = enc; @@ -2917,12 +3039,15 @@ } } +#if !WITH_OBJC if (i == 0) { rb_enc_copy(source, v); } +#endif rb_str_append(source, v); } +#if !WITH_OBJC if (has_ascii_incompat) { result_enc = has_ascii_incompat; } @@ -2934,6 +3059,7 @@ } rb_enc_associate(source, result_enc); +#endif return rb_class_new_instance(1, &source, rb_cRegexp); } } @@ -2998,18 +3124,24 @@ rb_encoding *str_enc = rb_enc_get(str); rb_encoding *src_enc = rb_enc_get(src); +#if !WITH_OBJC rb_enc_check(str, src); +#endif p = s = RSTRING_CPTR(str); e = s + RSTRING_CLEN(str); while (s < e) { + const char *ss; +#if WITH_OBJC + int c = *s; + clen = 1; +#else int c = rb_enc_ascget(s, e, &clen, str_enc); - const char *ss; - if (c == -1) { s += mbclen(s, e, str_enc); continue; } +#endif ss = s; s += clen; @@ -3020,6 +3152,10 @@ } rb_enc_str_buf_cat(val, p, ss-p, str_enc); +#if WITH_OBJC + c = *s; + clen = 1; +#else c = rb_enc_ascget(s, e, &clen, str_enc); if (c == -1) { s += mbclen(s, e, str_enc); @@ -3027,6 +3163,7 @@ p = s; continue; } +#endif s += clen; p = s; @@ -3042,14 +3179,26 @@ break; case 'k': +#if WITH_OBJC + clen = 1; + if (s < e && *s == '<') { +#else if (s < e && rb_enc_ascget(s, e, &clen, str_enc) == '<') { +#endif const char *name, *name_end; name_end = name = s + clen; while (name_end < e) { +#if WITH_OBJC + c = *name_end; + clen = 1; + if (c == '>') break; + name_end += clen; +#else c = rb_enc_ascget(name_end, e, &clen, str_enc); if (c == '>') break; name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen; +#endif } if (name_end < e) { no = name_to_backref_number(regs, regexp, name, name_end); Modified: MacRuby/trunk/ruby.c =================================================================== --- MacRuby/trunk/ruby.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/ruby.c 2008-05-15 08:06:22 UTC (rev 192) @@ -91,7 +91,11 @@ struct { struct { VALUE name; +#if WITH_OBJC + rb_encoding *enc; +#else int index; +#endif } enc; } src, ext; }; @@ -926,6 +930,17 @@ Init_prelude(); } +#if WITH_OBJC +static rb_encoding * +opt_enc_find(VALUE enc_name) +{ + rb_encoding *enc = rb_enc_find2(enc_name); + if (enc == NULL) + rb_raise(rb_eRuntimeError, "unknown encoding name - %s", + RSTRING_CPTR(enc_name)); + return enc; +} +#else static int opt_enc_index(VALUE enc_name) { @@ -940,8 +955,13 @@ } return i; } +#endif +#if WITH_OBJC +static rb_encoding *src_encoding; +#else static int src_encoding_index = -1; /* TODO: VM private */ +#endif static VALUE process_options(VALUE arg) @@ -1059,15 +1079,30 @@ parser = rb_parser_new(); if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue); if (opt->ext.enc.name != 0) { +#if WITH_OBJC + opt->ext.enc.enc = opt_enc_find(opt->ext.enc.name); +#else opt->ext.enc.index = opt_enc_index(opt->ext.enc.name); +#endif } if (opt->src.enc.name != 0) { +#if WITH_OBJC + opt->src.enc.enc = opt_enc_find(opt->src.enc.name); + src_encoding = opt->src.enc.enc; +#else opt->src.enc.index = opt_enc_index(opt->src.enc.name); src_encoding_index = opt->src.enc.index; +#endif } +#if WITH_OBJC + if (opt->ext.enc.enc != NULL) { + enc = opt->ext.enc.enc; + } +#else if (opt->ext.enc.index >= 0) { enc = rb_enc_from_index(opt->ext.enc.index); } +#endif else { enc = rb_locale_encoding(); } @@ -1075,13 +1110,21 @@ if (opt->e_script) { rb_encoding *eenc; +#if WITH_OBJC + if (opt->src.enc.enc != NULL) { + eenc = opt->src.enc.enc; + } +#else if (opt->src.enc.index >= 0) { eenc = rb_enc_from_index(opt->src.enc.index); } +#endif else { eenc = rb_locale_encoding(); } +#if !WITH_OBJC rb_enc_associate(opt->e_script, eenc); +#endif require_libraries(); tree = rb_parser_compile_string(parser, opt->script, opt->e_script, 1); } @@ -1233,11 +1276,20 @@ } rb_io_ungetc(f, INT2FIX('#')); if (no_src_enc && opt->src.enc.name) { +#if WITH_OBJC + opt->src.enc.enc = opt_enc_find(opt->src.enc.name); + src_encoding = opt->src.enc.enc; +#else opt->src.enc.index = opt_enc_index(opt->src.enc.name); src_encoding_index = opt->src.enc.index; +#endif } if (no_ext_enc && opt->ext.enc.name) { +#if WITH_OBJC + opt->ext.enc.enc = opt_enc_find(opt->ext.enc.name); +#else opt->ext.enc.index = opt_enc_index(opt->ext.enc.name); +#endif } } else if (!NIL_P(c)) { @@ -1245,6 +1297,14 @@ } require_libraries(); /* Why here? unnatural */ } +#if WITH_OBJC + if (opt->src.enc.enc != NULL) { + enc = opt->src.enc.enc; + } + else { + enc = rb_locale_encoding(); + } +#else if (opt->src.enc.index >= 0) { enc = rb_enc_from_index(opt->src.enc.index); } @@ -1254,6 +1314,7 @@ else { enc = rb_usascii_encoding(); } +#endif rb_funcall(f, rb_intern("set_encoding"), 1, rb_enc_from_encoding(enc)); tree = (NODE *)rb_parser_compile_file(parser, fname, f, line_start); rb_funcall(f, rb_intern("set_encoding"), 1, rb_parser_encoding(parser)); @@ -1272,7 +1333,11 @@ struct cmdline_options opt; MEMZERO(&opt, opt, 1); +#if WITH_OBJC + opt.src.enc.enc = src_encoding; +#else opt.src.enc.index = src_encoding_index; +#endif return load_file(rb_parser_new(), fname, 0, &opt); } @@ -1504,8 +1569,13 @@ args.argc = argc; args.argv = argv; args.opt = &opt; +#if WITH_OBJC + opt.src.enc.enc = src_encoding; + opt.ext.enc.enc = NULL; +#else opt.src.enc.index = src_encoding_index; opt.ext.enc.index = -1; +#endif tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(), process_options, (VALUE)&args, 0, rb_progname); Modified: MacRuby/trunk/string.c =================================================================== --- MacRuby/trunk/string.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/string.c 2008-05-15 08:06:22 UTC (rev 192) @@ -215,15 +215,18 @@ } #endif -#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) -#define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) - #if WITH_OBJC -# define STR_ENC_GET(str) (rb_ascii8bit_encoding()) /* TODO */ +/* TODO */ +# define is_ascii_string(str) (1) +# define is_broken_string(str) (0) +# define STR_ENC_GET(str) (NULL) #else +# define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) +# define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) # define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str)) #endif +#if !WITH_OBJC static int single_byte_optimizable(VALUE str) { @@ -240,9 +243,11 @@ * "\xa1" in Shift_JIS for example. */ return 0; } +#endif VALUE rb_fs; +#if !WITH_OBJC static inline const char * search_nonascii(const char *p, const char *e) { @@ -420,9 +425,6 @@ int rb_enc_str_coderange(VALUE str) { -#if WITH_OBJC - return ENC_CODERANGE_VALID; -#else int cr = ENC_CODERANGE(str); if (cr == ENC_CODERANGE_UNKNOWN) { @@ -431,15 +433,11 @@ ENC_CODERANGE_SET(str, cr); } return cr; -#endif } int rb_enc_str_asciionly_p(VALUE str) { -#if WITH_OBJC - return Qtrue; -#else rb_encoding *enc = STR_ENC_GET(str); if (!rb_enc_asciicompat(enc)) @@ -447,8 +445,8 @@ else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) return Qtrue; return Qfalse; +} #endif -} static inline void str_mod_check(VALUE s, const char *p, long len) @@ -585,7 +583,7 @@ { VALUE str = str_new(rb_cString, ptr, len); - ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + //ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); return str; } @@ -908,6 +906,7 @@ return str; } +#if !WITH_OBJC long rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) { @@ -990,6 +989,7 @@ if (!*cr) *cr = ENC_CODERANGE_7BIT; return c; } +#endif static long str_strlen(VALUE str, rb_encoding *enc) @@ -1399,6 +1399,7 @@ return rb_check_string_type(str); } +#if !WITH_OBJC char* rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc) { @@ -1512,7 +1513,8 @@ if (!pp) return e - p; return pp - p; } -#endif +#endif /* NONASCII_MASK */ +#endif /* WITH_OBJC */ /* byte offset to char offset */ long @@ -2904,6 +2906,7 @@ NEIGHBOR_WRAPPED }; +#if !WITH_OBJC static enum neighbor_char enc_succ_char(char *p, int len, rb_encoding *enc) { @@ -2967,6 +2970,7 @@ } } } +#endif /* overwrite +p+ by succeeding letter in +enc+ and returns @@ -2980,6 +2984,10 @@ static enum neighbor_char enc_succ_alnum_char(char *p, int len, rb_encoding *enc, char *carry) { +#if WITH_OBJC + /* TODO rewrite me */ + return NEIGHBOR_NOT_CHAR; +#else enum neighbor_char ret; int c; int ctype; @@ -3031,6 +3039,7 @@ MEMCPY(carry, p, char, len); enc_succ_char(carry, len, enc); return NEIGHBOR_WRAPPED; +#endif } @@ -3062,6 +3071,69 @@ VALUE rb_str_succ(VALUE orig) { +#if WITH_OBJC + UniChar *buf; + UniChar carry; + long i, len; + bool modified; + + len = CFStringGetLength((CFStringRef)orig); + if (len == 0) + return orig; + + buf = (UniChar *)alloca(sizeof(UniChar) * (len + 1)); + buf++; + + CFStringGetCharacters((CFStringRef)orig, CFRangeMake(0, len), buf); + modified = false; + carry = 0; + + for (i = len - 1; i >= 0; i--) { + UniChar c = buf[i]; + if (iswdigit(c)) { + modified = true; + if (c != '9') { + buf[i]++; + carry = 0; + break; + } + else { + buf[i] = '0'; + carry = '1'; + } + } + else if (iswalpha(c)) { + bool lower = islower(c); + UniChar e = lower ? 'z' : 'Z'; + modified = true; + if (c != e) { + buf[i]++; + carry = 0; + break; + } + else { + carry = buf[i] = lower ? 'a' : 'A'; + } + } + } + + if (!modified) { + buf[len-1]++; + } + else if (carry != 0) { + buf--; + *buf = carry; + len++; + } + + CFMutableStringRef newstr; + + newstr = CFStringCreateMutable(NULL, 0); + CFStringAppendCharacters(newstr, buf, len); + CFMakeCollectable(newstr); + + return (VALUE)newstr; +#else rb_encoding *enc; VALUE str; char *sbeg, *s, *e; @@ -3133,6 +3205,7 @@ rb_enc_str_coderange(str); #endif return str; +#endif } @@ -3186,8 +3259,28 @@ rb_scan_args(argc, argv, "11", &end, &exclusive); excl = RTEST(exclusive); - succ = rb_intern("succ"); StringValue(end); +#if WITH_OBJC + if (RSTRING_CLEN(beg) == 1 && RSTRING_CLEN(end) == 1) { + UniChar c = CFStringGetCharacterAtIndex((CFStringRef)beg, 0); + UniChar e = CFStringGetCharacterAtIndex((CFStringRef)end, 0); + + if (c > e || (excl && c == e)) + return beg; + for (;;) { + CFMutableStringRef substr; + substr = CFStringCreateMutable(NULL, 0); + CFStringAppendCharacters(substr, &c, 1); + CFMakeCollectable(substr); + rb_yield((VALUE)substr); + if (!excl && c == e) + break; + c++; + if (excl && c == e) + break; + } + return beg; +#else enc = rb_enc_check(beg, end); if (RSTRING_CLEN(beg) == 1 && RSTRING_CLEN(end) == 1 && is_ascii_string(beg) && is_ascii_string(end)) { @@ -3202,10 +3295,12 @@ if (excl && c == e) break; } return beg; +#endif } n = rb_str_cmp(beg, end); if (n > 0 || (excl && n == 0)) return beg; + succ = rb_intern("succ"); after_end = rb_funcall(end, succ, 0, 0); current = beg; while (!rb_str_equal(current, after_end)) { @@ -3456,9 +3551,13 @@ end = END(nth); len = end - start; StringValue(val); +#if !WITH_OBJC enc = rb_enc_check(str, val); +#endif rb_str_splice_0(str, start, len, val); +#if !WITH_OBJC rb_enc_associate(str, enc); +#endif } static VALUE @@ -3681,7 +3780,9 @@ pat = get_pat(argv[0], 1); if (rb_reg_search(pat, str, 0, 0) >= 0) { rb_encoding *enc; +#if !WITH_OBJC int cr = ENC_CODERANGE(str); +#endif match = rb_backref_get(); regs = RMATCH_REGS(match); @@ -3859,9 +3960,7 @@ val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0))); val = rb_obj_as_string(val); } -#if !WITH_OBJC str_mod_check(str, sp, slen); -#endif if (bang) str_frozen_check(str); if (val == dest) { /* paranoid check [ruby-dev:24827] */ rb_raise(rb_eRuntimeError, "block should not cheat"); @@ -3888,7 +3987,11 @@ * in order to prevent infinite loops. */ if (slen <= END(0)) break; +#if WITH_OBJC + len = 1; +#else len = rb_enc_mbclen(sp+END(0), sp+slen, str_enc); +#endif rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc); offset = END(0) + len; } @@ -4340,11 +4443,16 @@ static void str_cat_char(VALUE str, int c, rb_encoding *enc) { +#if WITH_OBJC + CFStringAppendCharacters((CFMutableStringRef)str, + (const UniChar *)&c, 1); +#else char s[16]; int n = rb_enc_codelen(c, enc); rb_enc_mbcput(c, s, enc); rb_enc_str_buf_cat(str, s, n, enc); +#endif } static void @@ -4388,14 +4496,20 @@ p = RSTRING_PTR(str); pend = RSTRING_END(str); #endif result = rb_str_buf_new2(""); +#if !WITH_OBJC if (!rb_enc_asciicompat(enc)) enc = rb_usascii_encoding(); rb_enc_associate(result, enc); +#endif str_cat_char(result, '"', enc); while (p < pend) { int c; int n; int cc; +#if WITH_OBJC + c = *p; + n = 1; +#else n = rb_enc_precise_mbclen(p, pend, enc); if (!MBCLEN_CHARFOUND_P(n)) { p++; @@ -4406,13 +4520,18 @@ c = rb_enc_codepoint(p, pend, enc); n = rb_enc_codelen(c, enc); +#endif p += n; if (c == '"'|| c == '\\' || (c == '#' && p < pend && +#if WITH_OBJC + ((cc = *p), +#else MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) && (cc = rb_enc_codepoint(p,pend,enc), +#endif (cc == '$' || cc == '@' || cc == '{')))) { prefix_escape(result, c, enc); } @@ -4522,7 +4641,7 @@ } if (!rb_enc_asciicompat(enc0)) { len += 19; /* ".force_encoding('')" */ - len += strlen(enc0->name); + len += strlen(rb_enc_name(enc0)); } result = rb_str_new5(str, 0, len); @@ -4584,13 +4703,17 @@ } *q++ = '"'; if (!rb_enc_asciicompat(enc0)) { - sprintf(q, ".force_encoding(\"%s\")", enc0->name); + sprintf(q, ".force_encoding(\"%s\")", rb_enc_name(enc0)); +#if !WITH_OBJC enc0 = rb_ascii8bit_encoding(); +#endif } OBJ_INFECT(result, str); /* result from dump is ASCII */ +#if !WITH_OBJC rb_enc_associate(result, enc0); +#endif RSTRING_SYNC(result); return result; } @@ -5512,7 +5635,9 @@ } if (modify) { +#if !WITH_OBJC rb_enc_associate(str, enc); +#endif return str; } return Qnil; @@ -6016,13 +6141,16 @@ VALUE spat; VALUE limit; int awk_split = Qfalse; + int spat_string = Qfalse; long beg, end, i = 0; int lim = 0; VALUE result, tmp; const char *cstr; long clen; +#if !WITH_OBJC cstr = RSTRING_CPTR(str); +#endif clen = RSTRING_CLEN(str); if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) { @@ -6036,11 +6164,7 @@ i = 1; } -#if WITH_OBJC - enc = rb_ascii8bit_encoding(); -#else enc = STR_ENC_GET(str); -#endif result = rb_ary_new(); if (NIL_P(spat)) { if (!NIL_P(rb_fs)) { @@ -6052,17 +6176,19 @@ else { fs_set: if (TYPE(spat) == T_STRING) { + spat_string = Qtrue; +#if WITH_OBJC + if (RSTRING_CLEN(spat) == 1 + && CFStringGetCharacterAtIndex((CFStringRef)spat, 0) == ' ') { + awk_split = Qtrue; + } +#else const char *spat_cstr; long spat_clen; -#if WITH_OBJC - rb_encoding *enc2 = rb_ascii8bit_encoding(); -#else rb_encoding *enc2 = STR_ENC_GET(spat); -#endif spat_cstr = RSTRING_CPTR(spat); spat_clen = RSTRING_CLEN(spat); - if (rb_enc_mbminlen(enc2) == 1) { if (spat_clen == 1 && spat_cstr[0] == ' '){ awk_split = Qtrue; @@ -6076,47 +6202,59 @@ } } if (!awk_split) { + spat = rb_reg_regcomp(rb_reg_quote(spat)); + } +#endif + } + else { + spat = get_pat(spat, 1); + } + } + + beg = 0; #if WITH_OBJC - CFRange search_range; - search_range = CFRangeMake(0, clen); - do { - CFRange result_range; - CFRange substr_range; - if (!CFStringFindWithOptions((CFStringRef)str, + if (awk_split || spat_string) { + CFRange search_range; + if (spat == Qnil) + spat = (VALUE)CFSTR(" "); + search_range = CFRangeMake(0, clen); + do { + CFRange result_range; + CFRange substr_range; + if (!CFStringFindWithOptions((CFStringRef)str, (CFStringRef)spat, search_range, 0, &result_range)) - break; + break; - substr_range.location = search_range.location; - substr_range.length = result_range.location - - search_range.location; + substr_range.location = search_range.location; + substr_range.length = result_range.location + - search_range.location; - rb_ary_push(result, - rb_str_subseq(str, substr_range.location, - substr_range.length)); + if (awk_split == Qfalse || substr_range.length > 0) { + VALUE substr; + + substr = rb_str_subseq(str, substr_range.location, + substr_range.length); - search_range.location = result_range.location - + result_range.length; - search_range.length = clen - search_range.location; + if (awk_split == Qtrue) { + CFStringTrimWhitespace((CFMutableStringRef)substr); + if (CFStringGetLength((CFStringRef)substr) > 0) + rb_ary_push(result, substr); } - while ((limit == Qnil || --lim > 1)); - rb_ary_push(result, - rb_str_subseq(str, search_range.location, - search_range.length)); - goto done; -#else - spat = rb_reg_regcomp(rb_reg_quote(spat)); -#endif + else { + rb_ary_push(result, substr); + } } + + search_range.location = result_range.location + + result_range.length; + search_range.length = clen - search_range.location; } - else { - spat = get_pat(spat, 1); - } - } - - beg = 0; + while ((limit == Qnil || --lim > 1)); + beg = search_range.location; +#else if (awk_split) { const char *ptr = cstr; const char *eptr = cstr+clen; @@ -6150,6 +6288,7 @@ } } } +#endif } else { long start = beg; @@ -6160,22 +6299,34 @@ while ((end = rb_reg_search(spat, str, start, 0)) >= 0) { regs = RMATCH_REGS(rb_backref_get()); if (start == end && BEG(0) == END(0)) { +#if WITH_OBJC + if (0) { +#else if (!cstr) { - //rb_ary_push(result, rb_str_new("", 0)); + rb_ary_push(result, rb_str_new("", 0)); +#endif break; } else if (last_null == 1) { +#if WITH_OBJC + rb_ary_push(result, rb_str_subseq(str, beg, 1)); +#else rb_ary_push(result, rb_str_subseq(str, beg, rb_enc_mbclen(cstr+beg, cstr+clen, enc))); +#endif beg = start; } else { - if (cstr+start == cstr+clen) + if (start == clen) start++; else +#if WITH_OBJC + start += 1; +#else start += rb_enc_mbclen(cstr+start,cstr+clen,enc); +#endif last_null = 1; continue; } @@ -6204,7 +6355,6 @@ tmp = rb_str_subseq(str, beg, clen-beg); rb_ary_push(result, tmp); } -done: if (NIL_P(limit) && lim == 0) { while (RARRAY_LEN(result) > 0 && RSTRING_CLEN(RARRAY_AT(result, RARRAY_LEN(result)-1)) == 0) @@ -7885,7 +8035,7 @@ VALUE str; sym = rb_id2str(id); - if (!rb_enc_symname_p(RSTRING_CPTR(sym), rb_ascii8bit_encoding())) { + if (!rb_enc_symname_p(RSTRING_CPTR(sym), NULL)) { sym = rb_str_inspect(sym); } str = rb_str_new(":", 1); Modified: MacRuby/trunk/time.c =================================================================== --- MacRuby/trunk/time.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/time.c 2008-05-15 08:06:22 UTC (rev 192) @@ -2097,7 +2097,7 @@ rb_str_cat(str, buf, len); p += strlen(p); if (buf != buffer) { - free(buf); + xfree(buf); buf = buffer; } for (fmt = p; p < pe && !*p; ++p); @@ -2109,8 +2109,10 @@ len = rb_strftime(&buf, RSTRING_CPTR(format), &tobj->tm); } str = rb_str_new(buf, len); - if (buf != buffer) free(buf); + if (buf != buffer) xfree(buf); +#if !WITH_OBJC rb_enc_copy(str, format); +#endif return str; } Modified: MacRuby/trunk/transcode.c =================================================================== --- MacRuby/trunk/transcode.c 2008-05-11 20:36:13 UTC (rev 191) +++ MacRuby/trunk/transcode.c 2008-05-15 08:06:22 UTC (rev 192) @@ -10,6 +10,9 @@ **********************************************************************/ #include "ruby/ruby.h" + +#if !WITH_OBJC + #include "ruby/encoding.h" #define PType (int) #include "transcode_data.h" @@ -441,15 +444,35 @@ return newstr; } +#else // WITH_OBJC + +static VALUE +rb_str_transcode(int argc, VALUE *argv, VALUE self) +{ + /* TODO */ + return self; +} + +static VALUE +rb_str_transcode_bang(int argc, VALUE *argv, VALUE self) +{ + /* TODO */ + return self; +} + +#endif + void Init_transcode(void) { +#if !WITH_OBJC transcoder_table = st_init_strcasetable(); transcoder_lib_table = st_init_strcasetable(); init_transcoder_table(); sym_invalid = ID2SYM(rb_intern("invalid")); sym_ignore = ID2SYM(rb_intern("ignore")); +#endif rb_define_method(rb_cString, "encode", rb_str_transcode, -1); rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
participants (1)
-
source_changes@macosforge.org