Revision: 4161 http://trac.macosforge.org/projects/ruby/changeset/4161 Author: eloy.de.enige@gmail.com Date: 2010-05-26 04:41:43 -0700 (Wed, 26 May 2010) Log Message: ----------- Implement String#each_codepoint / String#codepoints. Modified Paths: -------------- MacRuby/trunk/string.c Removed Paths: ------------- MacRuby/trunk/spec/frozen/tags/macruby/core/string/codepoints_tags.txt MacRuby/trunk/spec/frozen/tags/macruby/core/string/each_codepoint_tags.txt Deleted: MacRuby/trunk/spec/frozen/tags/macruby/core/string/codepoints_tags.txt =================================================================== --- MacRuby/trunk/spec/frozen/tags/macruby/core/string/codepoints_tags.txt 2010-05-26 06:06:33 UTC (rev 4160) +++ MacRuby/trunk/spec/frozen/tags/macruby/core/string/codepoints_tags.txt 2010-05-26 11:41:43 UTC (rev 4161) @@ -1,11 +0,0 @@ -fails:String#codepoints returns an Enumerator when no block is given -fails:String#codepoints returns an Enumerator when no block is given even when self has an invalid encoding -fails:String#codepoints raises an ArgumentError when self has an invalid encoding and a method is called on the returned Enumerator -fails:String#codepoints yields each codepoint to the block if one is given -fails:String#codepoints raises an ArgumentError if self's encoding is invalid and a block is given -fails:String#codepoints returns codepoints as Fixnums -fails:String#codepoints returns one codepoint for each character -fails:String#codepoints works for multibyte characters -fails:String#codepoints returns the codepoint corresponding to the character's position in the String's encoding -fails:String#codepoints round-trips to the original String using Integer#chr -fails:String#codepoints is synonomous with #bytes for Strings which are single-byte optimisable Deleted: MacRuby/trunk/spec/frozen/tags/macruby/core/string/each_codepoint_tags.txt =================================================================== --- MacRuby/trunk/spec/frozen/tags/macruby/core/string/each_codepoint_tags.txt 2010-05-26 06:06:33 UTC (rev 4160) +++ MacRuby/trunk/spec/frozen/tags/macruby/core/string/each_codepoint_tags.txt 2010-05-26 11:41:43 UTC (rev 4161) @@ -1,11 +0,0 @@ -fails:String#each_codepoint returns an Enumerator when no block is given -fails:String#each_codepoint returns an Enumerator when no block is given even when self has an invalid encoding -fails:String#each_codepoint raises an ArgumentError when self has an invalid encoding and a method is called on the returned Enumerator -fails:String#each_codepoint yields each codepoint to the block if one is given -fails:String#each_codepoint raises an ArgumentError if self's encoding is invalid and a block is given -fails:String#each_codepoint returns codepoints as Fixnums -fails:String#each_codepoint returns one codepoint for each character -fails:String#each_codepoint works for multibyte characters -fails:String#each_codepoint returns the codepoint corresponding to the character's position in the String's encoding -fails:String#each_codepoint round-trips to the original String using Integer#chr -fails:String#each_codepoint is synonomous with #bytes for Strings which are single-byte optimisable Modified: MacRuby/trunk/string.c =================================================================== --- MacRuby/trunk/string.c 2010-05-26 06:06:33 UTC (rev 4160) +++ MacRuby/trunk/string.c 2010-05-26 11:41:43 UTC (rev 4161) @@ -4813,6 +4813,43 @@ /* * call-seq: + * str.codepoints {|integer| block } -> str + * str.codepoints -> an_enumerator + * + * str.each_codepoint {|integer| block } -> str + * str.each_codepoint -> an_enumerator + * + * Passes the <code>Integer</code> ordinal of each character in <i>str</i>, + * also known as a <i>codepoint</i> when applied to Unicode strings to the + * given block. + * + * If no block is given, an enumerator is returned instead. + * + * "hello\u0639".each_codepoint {|c| print c, ' ' } + * + * <em>produces:</em> + * + * 104 101 108 108 111 1593 + */ + +static VALUE +rstr_each_codepoint(VALUE str, SEL sel) +{ + if (!str_is_valid_encoding(RSTR(str))) { + rb_raise(rb_eArgError, "invalid byte sequence in %s", + RSTR(str)->encoding->public_name); + } + RETURN_ENUMERATOR(str, 0, 0); + + const long len = str_length(RSTR(str), true); + for (int i = 0; i < len; i++) { + rb_yield(INT2NUM(rb_str_get_uchar(str, i))); + } + return str; +} + +/* + * call-seq: * str.succ => new_str * str.next => new_str * @@ -5938,6 +5975,8 @@ rb_objc_define_method(rb_cRubyString, "each_char", rstr_each_char, 0); rb_objc_define_method(rb_cRubyString, "bytes", rstr_each_byte, 0); rb_objc_define_method(rb_cRubyString, "each_byte", rstr_each_byte, 0); + rb_objc_define_method(rb_cRubyString, "codepoints", rstr_each_codepoint, 0); + rb_objc_define_method(rb_cRubyString, "each_codepoint", rstr_each_codepoint, 0); rb_objc_define_method(rb_cRubyString, "succ", rstr_succ, 0); rb_objc_define_method(rb_cRubyString, "succ!", rstr_succ_bang, 0); rb_objc_define_method(rb_cRubyString, "next", rstr_succ, 0);