Revision: 3622 http://trac.macosforge.org/projects/ruby/changeset/3622 Author: lsansonetti@apple.com Date: 2010-02-25 13:53:42 -0800 (Thu, 25 Feb 2010) Log Message: ----------- regexps are now AOT compilable Modified Paths: -------------- MacRuby/branches/icu/compiler.cpp MacRuby/branches/icu/compiler.h MacRuby/branches/icu/encoding.h MacRuby/branches/icu/include/ruby/ruby.h MacRuby/branches/icu/re.cpp MacRuby/branches/icu/re.h MacRuby/branches/icu/string.c Modified: MacRuby/branches/icu/compiler.cpp =================================================================== --- MacRuby/branches/icu/compiler.cpp 2010-02-25 21:13:29 UTC (rev 3621) +++ MacRuby/branches/icu/compiler.cpp 2010-02-25 21:53:42 UTC (rev 3622) @@ -18,12 +18,13 @@ #include "ruby/ruby.h" #include "ruby/encoding.h" #include "ruby/node.h" -#include "ruby/re.h" #include "id.h" #include "vm.h" #include "compiler.h" #include "objc.h" #include "version.h" +#include "encoding.h" +#include "re.h" extern "C" const char *ruby_node_name(int node); @@ -498,10 +499,12 @@ GlobalVariable * RoxorCompiler::compile_const_global_ustring(const UniChar *str, - const size_t len, CFHashCode hash) + const size_t len) { assert(len > 0); + const unsigned long hash = rb_str_hash_uchars(str, len); + std::map<CFHashCode, GlobalVariable *>::iterator iter = static_ustrings.find(hash); @@ -2797,22 +2800,18 @@ return CallInst::Create(newString3Func, "", bb); } else { - UniChar *buf = (UniChar *)CFStringGetCharactersPtr( - (CFStringRef)val); - bool free_buf = false; - if (buf == NULL) { - buf = (UniChar *)malloc(sizeof(UniChar) * str_len); - CFStringGetCharacters((CFStringRef)val, - CFRangeMake(0, str_len), buf); - free_buf = true; - } + UChar *chars = NULL; + long chars_len = 0; + bool need_free = false; - GlobalVariable *str_gvar = compile_const_global_ustring(buf, - str_len, CFHash((CFTypeRef)val)); + rb_str_get_uchars(val, &chars, &chars_len, &need_free); - if (free_buf) { - free(buf); - buf = NULL; + GlobalVariable *str_gvar = compile_const_global_ustring(chars, + chars_len); + + if (need_free) { + free(chars); + chars = NULL; } std::vector<Value *> idxs; @@ -5621,8 +5620,10 @@ RubyObjTy, PtrTy, NULL)); Function *newRegexp2Func = - cast<Function>(module->getOrInsertFunction("rb_reg_new_retained", - RubyObjTy, PtrTy, Int32Ty, Int32Ty, NULL)); + cast<Function>(module->getOrInsertFunction( + "rb_unicode_regex_new_retained", + RubyObjTy, PointerType::getUnqual(Int16Ty), Int32Ty, + Int32Ty, NULL)); Function *newBignumFunc = cast<Function>(module->getOrInsertFunction("rb_bignum_new_retained", @@ -5669,28 +5670,33 @@ case T_REGEXP: { - struct RRegexp *re = (struct RRegexp *)val; + const UChar *chars = NULL; + long chars_len = 0; + regexp_get_uchars(val, &chars, &chars_len); + Value *re_str; - if (re->len == 0) { + if (chars_len == 0) { + re_str = ConstantPointerNull::get( + PointerType::getUnqual(Int16Ty)); re_str = compile_const_pointer(NULL, NULL); } else { - GlobalVariable *rename_gvar = - compile_const_global_string(re->str, re->len); + GlobalVariable *re_name_gvar = + compile_const_global_ustring(chars, chars_len); std::vector<Value *> idxs; idxs.push_back(ConstantInt::get(Int32Ty, 0)); idxs.push_back(ConstantInt::get(Int32Ty, 0)); - re_str = GetElementPtrInst::Create(rename_gvar, + re_str = GetElementPtrInst::Create(re_name_gvar, idxs.begin(), idxs.end(), ""); } std::vector<Value *> params; params.push_back(re_str); - params.push_back(ConstantInt::get(Int32Ty, re->len)); + params.push_back(ConstantInt::get(Int32Ty, chars_len)); params.push_back(ConstantInt::get(Int32Ty, - re->ptr->options)); + rb_reg_options(val))); Instruction *call = CallInst::Create(newRegexp2Func, params.begin(), params.end(), ""); Modified: MacRuby/branches/icu/compiler.h =================================================================== --- MacRuby/branches/icu/compiler.h 2010-02-25 21:13:29 UTC (rev 3621) +++ MacRuby/branches/icu/compiler.h 2010-02-25 21:53:42 UTC (rev 3622) @@ -314,7 +314,7 @@ return compile_const_global_string(str, strlen(str)); } GlobalVariable *compile_const_global_ustring(const UniChar *str, - const size_t str_len, CFHashCode hash); + const size_t str_len); Value *compile_arity(rb_vm_arity_t &arity); Instruction *compile_range(Value *beg, Value *end, bool exclude_end, Modified: MacRuby/branches/icu/encoding.h =================================================================== --- MacRuby/branches/icu/encoding.h 2010-02-25 21:13:29 UTC (rev 3621) +++ MacRuby/branches/icu/encoding.h 2010-02-25 21:53:42 UTC (rev 3622) @@ -301,7 +301,7 @@ long rb_str_chars_len(VALUE str); UChar rb_str_get_uchar(VALUE str, long pos); void rb_str_append_uchar(VALUE str, UChar c); -unsigned long rb_str_hash_uchars(UChar *chars, long chars_len); +unsigned long rb_str_hash_uchars(const UChar *chars, long chars_len); VALUE mr_enc_s_is_compatible(VALUE klass, SEL sel, VALUE str1, VALUE str2); Modified: MacRuby/branches/icu/include/ruby/ruby.h =================================================================== --- MacRuby/branches/icu/include/ruby/ruby.h 2010-02-25 21:13:29 UTC (rev 3621) +++ MacRuby/branches/icu/include/ruby/ruby.h 2010-02-25 21:53:42 UTC (rev 3622) @@ -641,13 +641,6 @@ # define RARRAY_PTR(a) (rb_ary_ptr((VALUE)a)) #endif -struct RRegexp { - struct RBasic basic; - struct re_pattern_buffer *ptr; - long len; - char *str; -}; - #if !WITH_OBJC struct RHash { struct RBasic basic; Modified: MacRuby/branches/icu/re.cpp =================================================================== --- MacRuby/branches/icu/re.cpp 2010-02-25 21:13:29 UTC (rev 3621) +++ MacRuby/branches/icu/re.cpp 2010-02-25 21:53:42 UTC (rev 3622) @@ -1336,6 +1336,29 @@ "finalize", (IMP)match_finalize_imp); } +// Compiler primitives. + +void +regexp_get_uchars(VALUE re, const UChar **chars_p, long *chars_len_p) +{ + assert(chars_p != NULL && chars_len_p != NULL); + + UnicodeString *unistr = RREGEXP(re)->unistr; + assert(unistr != NULL); + + *chars_p = unistr->getBuffer(); + *chars_len_p = unistr->length(); +} + +VALUE +rb_unicode_regex_new_retained(UChar *chars, int chars_len, int options) +{ + VALUE str = rb_unicode_str_new(chars, chars_len); + VALUE re = rb_reg_new_str(str, options); + GC_RETAIN(re); + return re; +} + // MRI compatibility. VALUE Modified: MacRuby/branches/icu/re.h =================================================================== --- MacRuby/branches/icu/re.h 2010-02-25 21:13:29 UTC (rev 3621) +++ MacRuby/branches/icu/re.h 2010-02-25 21:53:42 UTC (rev 3622) @@ -23,6 +23,8 @@ VALUE rb_reg_regcomp(VALUE str); int rb_reg_search(VALUE re, VALUE str, int pos, bool reverse); +void regexp_get_uchars(VALUE re, const UChar **chars_p, long *chars_len_p); + typedef struct rb_match_result { unsigned int beg; unsigned int end; Modified: MacRuby/branches/icu/string.c =================================================================== --- MacRuby/branches/icu/string.c 2010-02-25 21:13:29 UTC (rev 3621) +++ MacRuby/branches/icu/string.c 2010-02-25 21:53:42 UTC (rev 3622) @@ -3731,7 +3731,7 @@ // Unicode characters hashing function, copied from CoreFoundation. // This function might have some performance issues on large strings. unsigned long -rb_str_hash_uchars(UChar *chars, long len) +rb_str_hash_uchars(const UChar *chars, long len) { #define HashNextFourUniChars(accessStart, accessEnd, pointer) \ {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}