[macruby-changes] [4053] MacRuby/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Sat May 8 23:45:51 PDT 2010
Revision: 4053
http://trac.macosforge.org/projects/ruby/changeset/4053
Author: vincent.isambart at gmail.com
Date: 2010-05-08 23:45:47 -0700 (Sat, 08 May 2010)
Log Message:
-----------
removed unused files, fixed bugs, added the crappy ICU EUC-JP support
Modified Paths:
--------------
MacRuby/trunk/encoding.c
MacRuby/trunk/encoding.h
MacRuby/trunk/inits.c
MacRuby/trunk/rakelib/builder/builder.rb
MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt
MacRuby/trunk/string.c
Removed Paths:
-------------
MacRuby/trunk/transcode.c
MacRuby/trunk/transcode_data.h
Modified: MacRuby/trunk/encoding.c
===================================================================
--- MacRuby/trunk/encoding.c 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/encoding.c 2010-05-09 06:45:47 UTC (rev 4053)
@@ -268,6 +268,7 @@
add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
// FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
+ add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
//add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
//add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
//add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
Modified: MacRuby/trunk/encoding.h
===================================================================
--- MacRuby/trunk/encoding.h 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/encoding.h 2010-05-09 06:45:47 UTC (rev 4053)
@@ -145,7 +145,7 @@
ENCODING_UTF32LE,
ENCODING_ISO8859_1,
ENCODING_MACROMAN,
- //ENCODING_EUCJP,
+ ENCODING_EUCJP,
//ENCODING_SJIS,
//ENCODING_CP932,
Modified: MacRuby/trunk/inits.c
===================================================================
--- MacRuby/trunk/inits.c 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/inits.c 2010-05-09 06:45:47 UTC (rev 4053)
@@ -16,7 +16,6 @@
void Init_Binding(void);
void Init_Comparable(void);
void Init_Complex(void);
-void Init_transcode(void);
void Init_Dir(void);
void Init_Enumerable(void);
void Init_Enumerator(void);
@@ -87,7 +86,6 @@
Init_Struct();
Init_Regexp();
Init_pack();
- Init_transcode();
Init_marshal();
Init_Range();
Init_IO();
Modified: MacRuby/trunk/rakelib/builder/builder.rb
===================================================================
--- MacRuby/trunk/rakelib/builder/builder.rb 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/rakelib/builder/builder.rb 2010-05-09 06:45:47 UTC (rev 4053)
@@ -3,7 +3,7 @@
OBJS = %w{
array bignum class compar complex enum enumerator error eval file load proc
gc hash env inits io math numeric object pack parse prec dir process
- random range rational re ruby signal sprintf st string struct time transcode
+ random range rational re ruby signal sprintf st string struct time
util variable version thread id objc bs ucnv encoding main dln dmyext marshal
gcd vm_eval prelude miniprelude gc-stub bridgesupport compiler dispatcher vm
symbol debugger MacRuby MacRubyDebuggerConnector NSArray NSDictionary
Modified: MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt
===================================================================
--- MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/spec/frozen/tags/macruby/core/string/encode_tags.txt 2010-05-09 06:45:47 UTC (rev 4053)
@@ -1,19 +1,11 @@
-fails:String#encode! transcodes to the default internal encoding with no argument
-fails:String#encode! transcodes self to the given encoding
fails:String#encode! can convert between encodings where a multi-stage conversion path is needed
-fails:String#encode! raises an Encoding::InvalidByteSequenceError for invalid byte sequences
fails:String#encode! raises UndefinedConversionError if the String contains characters invalid for the target encoding
fails:String#encode! raises Encoding::ConverterNotFoundError for invalid target encodings
-fails:String#encode transcodes to the default internal encoding with no argument
fails:String#encode returns self when called with only a target encoding
-fails:String#encode transcodes self to the given encoding
fails:String#encode can convert between encodings where a multi-stage conversion path is needed
-fails:String#encode raises an Encoding::InvalidByteSequenceError for invalid byte sequences
fails:String#encode raises UndefinedConversionError if the String contains characters invalid for the target encoding
fails:String#encode raises Encoding::ConverterNotFoundError for invalid target encodings
-fails:String#encode! replaces undefined characters
fails:String#encode! replaces xml characters
fails:String#encode! replaces xml characters and quotes the result
-fails:String#encode replaces undefined characters
fails:String#encode replaces xml characters
fails:String#encode replaces xml characters and quotes the result
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/string.c 2010-05-09 06:45:47 UTC (rev 4053)
@@ -1248,15 +1248,26 @@
TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT,
TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR
};
+
+
static rb_str_t *
str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
+ int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str);
+static inline rb_str_t *
+str_simple_transcode(rb_str_t *self, rb_encoding_t *dst_encoding)
+{
+ return str_transcode(self, self->encoding, dst_encoding,
+ TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
+}
+static rb_str_t *
+str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str)
{
if ((behavior_for_invalid == TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)
|| (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)) {
assert(replacement_str != NULL);
assert(replacement_str->encoding != NULL);
- assert(replacement_str->encoding == dst_encoding);
+ assert((replacement_str->length_in_bytes == 0) || (replacement_str->encoding == dst_encoding));
}
rb_str_t *dst_str = str_alloc(rb_cRubyString);
@@ -1326,10 +1337,24 @@
rb_raise(rb_eUndefinedConversionError, "U+%04X from %s to %s", c, src_encoding->public_name, dst_encoding->public_name);
break;
case TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING:
- str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ if (replacement_str->length_in_bytes > 0) {
+ str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ }
break;
case TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT:
case TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR:
+ {
+ char xml[10];
+ snprintf(xml, 10, "&#x%X;", c);
+ if (dst_encoding->ascii_compatible) {
+ str_concat_bytes(dst_str, xml, strlen(xml));
+ }
+ else {
+ rb_str_t *xml_str = RSTR(rb_str_new2(xml));
+ xml_str = str_simple_transcode(xml_str, dst_encoding);
+ str_concat_bytes(dst_str, xml_str->data.bytes, xml_str->length_in_bytes);
+ }
+ }
break;
default:
abort();
@@ -1360,7 +1385,9 @@
}
break;
case TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING:
- str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ if (replacement_str->length_in_bytes > 0) {
+ str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ }
break;
default:
abort();
@@ -1817,10 +1844,8 @@
VALUE replacement = rb_hash_aref(opt, replace_sym);
if (!NIL_P(replacement)) {
replacement_str = str_need_string(replacement);
- if (replacement_str->encoding != dst_encoding) {
- replacement_str = str_transcode(replacement_str, replacement_str->encoding,
- dst_encoding, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
- TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
+ if ((replacement_str->encoding != dst_encoding) && (replacement_str->length_in_bytes > 0)) {
+ replacement_str = str_simple_transcode(replacement_str, dst_encoding);
}
if ((behavior_for_invalid != TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)
&& (behavior_for_undefined == TRANSCODE_BEHAVIOR_RAISE_EXCEPTION)) {
@@ -1849,9 +1874,7 @@
}
else {
replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
- replacement_str = str_transcode(replacement_str, replacement_str->encoding,
- dst_encoding, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
- TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
+ replacement_str = str_simple_transcode(replacement_str, dst_encoding);
}
}
Deleted: MacRuby/trunk/transcode.c
===================================================================
--- MacRuby/trunk/transcode.c 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/transcode.c 2010-05-09 06:45:47 UTC (rev 4053)
@@ -1,474 +0,0 @@
-/**********************************************************************
-
- transcode.c -
-
- $Author: naruse $
- created at: Tue Oct 30 16:10:22 JST 2007
-
- Copyright (C) 2007 Martin Duerst
-
-**********************************************************************/
-
-#include "ruby/ruby.h"
-
-#if !WITH_OBJC
-
-#include "ruby/encoding.h"
-#define PType (int)
-#include "transcode_data.h"
-#include <ctype.h>
-
-static VALUE sym_invalid, sym_ignore;
-#define INVALID_IGNORE 0x1
-
-/*
- * Dispatch data and logic
- */
-
-static st_table *transcoder_table, *transcoder_lib_table;
-
-#define TRANSCODER_INTERNAL_SEPARATOR '\t'
-
-static char *
-transcoder_key(const char *from_e, const char *to_e)
-{
- int to_len = strlen(to_e);
- int from_len = strlen(from_e);
- char *const key = xmalloc(to_len + from_len + 2);
-
- memcpy(key, to_e, to_len);
- memcpy(key + to_len + 1, from_e, from_len + 1);
- key[to_len] = TRANSCODER_INTERNAL_SEPARATOR;
- return key;
-}
-
-void
-rb_register_transcoder(const rb_transcoder *tr)
-{
- st_data_t k, val = 0;
- const char *const from_e = tr->from_encoding;
- const char *const to_e = tr->to_encoding;
- char *const key = transcoder_key(from_e, to_e);
-
- if (st_lookup(transcoder_table, (st_data_t)key, &val)) {
- xfree(key);
- rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
- from_e, to_e);
- }
- k = (st_data_t)key;
- if (st_delete(transcoder_lib_table, &k, &val)) {
- xfree((char *)k);
- }
- st_insert(transcoder_table, (st_data_t)key, (st_data_t)tr);
-}
-
-static void
-declare_transcoder(const char *to, const char *from, const char *lib)
-{
- const char *const key = transcoder_key(to, from);
- st_data_t k = (st_data_t)key, val;
-
- if (st_delete(transcoder_lib_table, &k, &val)) {
- xfree((char *)k);
- }
- st_insert(transcoder_lib_table, (st_data_t)key, (st_data_t)lib);
-}
-
-#define MAX_TRANSCODER_LIBNAME_LEN 64
-static const char transcoder_lib_prefix[] = "enc/trans/";
-
-void
-rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
-{
- if (!lib || strlen(lib) > MAX_TRANSCODER_LIBNAME_LEN) {
- rb_raise(rb_eArgError, "invalid library name - %s",
- lib ? lib : "(null)");
- }
- declare_transcoder(enc1, enc2, lib);
- declare_transcoder(enc2, enc1, lib);
-}
-
-#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
-
-static const rb_transcoder *
-transcode_dispatch(const char* from_encoding, const char* to_encoding)
-{
- char *const key = transcoder_key(from_encoding, to_encoding);
- st_data_t k, val = 0;
-
- while (!st_lookup(transcoder_table, (k = (st_data_t)key), &val) &&
- st_delete(transcoder_lib_table, &k, &val)) {
- const char *const lib = (const char *)val;
- int len = strlen(lib);
- char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN];
-
- xfree((char *)k);
- if (len > MAX_TRANSCODER_LIBNAME_LEN) return NULL;
- memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
- memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1);
- if (!rb_require(path)) return NULL;
- }
- if (!val) {
- if (!st_lookup(transcoder_table, (st_data_t)key, &val)) {
- /* multistep logic, via UTF-8 */
- if (!encoding_equal(from_encoding, "UTF-8") &&
- !encoding_equal(to_encoding, "UTF-8") &&
- transcode_dispatch("UTF-8", to_encoding)) { /* check that we have a second step */
- return transcode_dispatch(from_encoding, "UTF-8"); /* return first step */
- }
- return NULL;
- }
- }
- return (rb_transcoder *)val;
-}
-
-
-/*
- * Transcoding engine logic
- */
-static void
-transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
- unsigned char *in_stop, unsigned char *out_stop,
- const rb_transcoder *my_transcoder,
- rb_transcoding *my_transcoding,
- const int opt)
-{
- unsigned char *in_p = *in_pos, *out_p = *out_pos;
- const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
- const BYTE_LOOKUP *next_table;
- unsigned char *char_start;
- unsigned int next_offset;
- VALUE next_info;
- unsigned char next_byte;
- int from_utf8 = my_transcoder->from_utf8;
- unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
- while (in_p < in_stop) {
- char_start = in_p;
- next_table = conv_tree_start;
- if (out_p >= out_s) {
- int len = (out_p - *out_pos);
- int new_len = (len + my_transcoder->max_output) * 2;
- *out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
- out_p = *out_pos + len;
- out_s = *out_pos + new_len - my_transcoder->max_output;
- }
- next_byte = (unsigned char)*in_p++;
- follow_byte:
- next_offset = next_table->base[next_byte];
- next_info = (VALUE)next_table->info[next_offset];
- follow_info:
- switch (next_info & 0x1F) {
- case NOMAP:
- *out_p++ = next_byte;
- continue;
- case 0x00: case 0x04: case 0x08: case 0x0C:
- case 0x10: case 0x14: case 0x18: case 0x1C:
- if (in_p >= in_stop) {
- /* todo: deal with the case of backtracking */
- /* todo: deal with incomplete input (streaming) */
- goto invalid;
- }
- next_byte = (unsigned char)*in_p++;
- if (from_utf8) {
- if ((next_byte&0xC0) == 0x80)
- next_byte -= 0x80;
- else {
- in_p--; /* may need to add more code later to revert other things */
- goto invalid;
- }
- }
- next_table = (const BYTE_LOOKUP *)next_info;
- goto follow_byte;
- /* maybe rewrite the following cases to use fallthrough???? */
- case ZERObt: /* drop input */
- continue;
- case ONEbt:
- *out_p++ = getBT1(next_info);
- continue;
- case TWObt:
- *out_p++ = getBT1(next_info);
- *out_p++ = getBT2(next_info);
- continue;
- case FOURbt:
- *out_p++ = getBT0(next_info);
- case THREEbt: /* fall through */
- *out_p++ = getBT1(next_info);
- *out_p++ = getBT2(next_info);
- *out_p++ = getBT3(next_info);
- continue;
- case FUNii:
- next_info = (VALUE)(*my_transcoder->func_ii)(next_info);
- goto follow_info;
- case FUNsi:
- next_info = (VALUE)(*my_transcoder->func_si)(char_start);
- goto follow_info;
- break;
- case FUNio:
- out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p);
- break;
- case FUNso:
- out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p);
- break;
- case INVALID:
- goto invalid;
- case UNDEF:
- /* todo: add code for alternate behaviors */
- rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
- continue;
- }
- continue;
- invalid:
- /* deal with invalid byte sequence */
- /* todo: add more alternative behaviors */
- if (opt&INVALID_IGNORE) {
- continue;
- }
- rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
- continue;
- }
- /* cleanup */
- *in_pos = in_p;
- *out_pos = out_p;
-}
-
-
-/*
- * String-specific code
- */
-
-static unsigned char *
-str_transcoding_resize(rb_transcoding *my_transcoding, int len, int new_len)
-{
- VALUE dest_string = my_transcoding->ruby_string_dest;
- rb_str_resize(dest_string, new_len);
- return (unsigned char *)RSTRING_BYTEPTR(dest_string);
-}
-
-static int
-str_transcode(int argc, VALUE *argv, VALUE *self)
-{
- VALUE dest;
- VALUE str = *self;
- long blen, slen;
- unsigned char *buf, *bp, *sp, *fromp;
- rb_encoding *from_enc, *to_enc;
- const char *from_e, *to_e;
- int from_encidx, to_encidx;
- VALUE from_encval, to_encval;
- const rb_transcoder *my_transcoder;
- rb_transcoding my_transcoding;
- int final_encoding = 0;
- VALUE opt;
- int options = 0;
-
- opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
- if (!NIL_P(opt)) {
- VALUE v;
-
- argc--;
- v = rb_hash_aref(opt, sym_invalid);
- if (NIL_P(v)) {
- rb_raise(rb_eArgError, "unknown value for invalid: setting");
- }
- else if (v==sym_ignore) {
- options |= INVALID_IGNORE;
- }
- }
- if (argc < 1 || argc > 2) {
- rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
- }
- if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) {
- to_enc = 0;
- to_encidx = 0;
- to_e = StringValueCStr(to_encval);
- }
- else {
- to_enc = rb_enc_from_index(to_encidx);
- to_e = rb_enc_name(to_enc);
- }
- if (argc==1) {
- from_encidx = rb_enc_get_index(str);
- from_enc = rb_enc_from_index(from_encidx);
- from_e = rb_enc_name(from_enc);
- }
- else if ((from_encidx = rb_to_encoding_index(from_encval = argv[1])) < 0) {
- from_enc = 0;
- from_e = StringValueCStr(from_encval);
- }
- else {
- from_enc = rb_enc_from_index(from_encidx);
- from_e = rb_enc_name(from_enc);
- }
-
- if (from_enc && from_enc == to_enc) {
- return -1;
- }
- if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) {
- if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
- return to_encidx;
- }
- }
- if (encoding_equal(from_e, to_e)) {
- return -1;
- }
-
- while (!final_encoding) { /* loop for multistep transcoding */
- /* later, maybe use smaller intermediate strings for very long strings */
- if (!(my_transcoder = transcode_dispatch(from_e, to_e))) {
- rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_e, to_e);
- }
-
- my_transcoding.transcoder = my_transcoder;
-
- if (my_transcoder->preprocessor) {
- fromp = sp = (unsigned char *)RSTRING_BYTEPTR(str);
- slen = RSTRING_BYTELEN(str);
- blen = slen + 30; /* len + margin */
- dest = rb_str_tmp_new(blen);
- bp = (unsigned char *)RSTRING_BYTEPTR(dest);
- my_transcoding.ruby_string_dest = dest;
- (*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
- if (fromp != sp+slen) {
- rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp);
- }
- buf = (unsigned char *)RSTRING_BYTEPTR(dest);
- *bp = '\0';
- rb_str_set_len(dest, bp - buf);
- str = dest;
- }
- fromp = sp = (unsigned char *)RSTRING_BYTEPTR(str);
- slen = RSTRING_BYTELEN(str);
- blen = slen + 30; /* len + margin */
- dest = rb_str_tmp_new(blen);
- bp = (unsigned char *)RSTRING_BYTEPTR(dest);
- my_transcoding.ruby_string_dest = dest;
- my_transcoding.flush_func = str_transcoding_resize;
-
- transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options);
- if (fromp != sp+slen) {
- rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp);
- }
- buf = (unsigned char *)RSTRING_BYTEPTR(dest);
- *bp = '\0';
- rb_str_set_len(dest, bp - buf);
- if (my_transcoder->postprocessor) {
- str = dest;
- fromp = sp = (unsigned char *)RSTRING_BYTEPTR(str);
- slen = RSTRING_BYTELEN(str);
- blen = slen + 30; /* len + margin */
- dest = rb_str_tmp_new(blen);
- bp = (unsigned char *)RSTRING_BYTEPTR(dest);
- my_transcoding.ruby_string_dest = dest;
- (*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
- if (fromp != sp+slen) {
- rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp);
- }
- buf = (unsigned char *)RSTRING_BYTEPTR(dest);
- *bp = '\0';
- rb_str_set_len(dest, bp - buf);
- }
-
- if (encoding_equal(my_transcoder->to_encoding, to_e)) {
- final_encoding = 1;
- }
- else {
- from_e = my_transcoder->to_encoding;
- str = dest;
- }
- }
- /* set encoding */
- if (!to_enc) {
- to_encidx = rb_define_dummy_encoding(to_e);
- }
- *self = dest;
-
- return to_encidx;
-}
-
-/*
- * call-seq:
- * str.encode!(encoding [, options] ) => str
- * str.encode!(to_encoding, from_encoding [, options] ) => str
- *
- * The first form transcodes the contents of <i>str</i> from
- * str.encoding to +encoding+.
- * The second form transcodes the contents of <i>str</i> from
- * from_encoding to to_encoding.
- * The options Hash gives details for conversion. See String#encode
- * for details.
- * Returns the string even if no changes were made.
- */
-
-static VALUE
-str_encode_bang(VALUE str, SEL sel, int argc, VALUE *argv)
-{
- VALUE newstr = str;
- int encidx = str_transcode(argc, argv, &newstr);
- int cr = 0;
-
- if (encidx < 0) return str;
- rb_str_shared_replace(str, newstr);
- rb_enc_associate_index(str, encidx);
-
- /* transcoded string never be broken. */
- if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {
- rb_str_coderange_scan_restartable(RSTRING_BYTEPTR(str), RSTRING_END(str), 0, &cr);
- }
- else {
- cr = ENC_CODERANGE_VALID;
- }
- ENC_CODERANGE_SET(str, cr);
- return str;
-}
-
-/*
- * call-seq:
- * str.encode(encoding [, options] ) => str
- * str.encode(to_encoding, from_encoding [, options] ) => str
- *
- * The first form returns a copy of <i>str</i> transcoded
- * to encoding +encoding+.
- * The second form returns a copy of <i>str</i> transcoded
- * from from_encoding to to_encoding.
- * The options Hash gives details for conversion. Details
- * to be added.
- */
-
-#else // WITH_OBJC
-
-static VALUE
-str_encode_bang(VALUE str, SEL sel, int argc, VALUE *argv)
-{
- /* TODO */
- return str;
-}
-
-#endif
-
-static VALUE
-str_encode(VALUE str, SEL sel, int argc, VALUE *argv)
-{
- str = rb_str_dup(str);
- return str_encode_bang(str, 0, argc, argv);
-}
-
-VALUE
-rb_str_transcode(VALUE str, VALUE to)
-{
- return str_encode(str, 0, 1, &to);
-}
-
-void
-Init_transcode(void)
-{
-#if !WITH_OBJC
- transcoder_table = st_init_strcasetable();
- transcoder_lib_table = st_init_strcasetable();
-
- sym_invalid = ID2SYM(rb_intern("invalid"));
- sym_ignore = ID2SYM(rb_intern("ignore"));
-#endif
-
- rb_objc_define_method(rb_cString, "encode", str_encode, -1);
- rb_objc_define_method(rb_cString, "encode!", str_encode_bang, -1);
-}
Deleted: MacRuby/trunk/transcode_data.h
===================================================================
--- MacRuby/trunk/transcode_data.h 2010-05-09 05:47:14 UTC (rev 4052)
+++ MacRuby/trunk/transcode_data.h 2010-05-09 06:45:47 UTC (rev 4053)
@@ -1,86 +0,0 @@
-/**********************************************************************
-
- transcode_data.h -
-
- $Author: nobu $
- created at: Mon 10 Dec 2007 14:01:47 JST 2007
-
- Copyright (C) 2007 Martin Duerst
-
-**********************************************************************/
-
-#include "ruby/ruby.h"
-
-#ifndef RUBY_TRANSCODE_DATA_H
-#define RUBY_TRANSCODE_DATA_H 1
-
-typedef unsigned char base_element;
-
-typedef struct byte_lookup {
- const base_element *base;
- const struct byte_lookup *const *info;
-} BYTE_LOOKUP;
-
-#ifndef PType
-/* data file needs to treat this as a pointer, to remove warnings */
-#define PType (const BYTE_LOOKUP *)
-#endif
-
-#define NOMAP (PType 0x01) /* single byte direct map */
-#define ONEbt (0x02) /* one byte payload */
-#define TWObt (0x03) /* two bytes payload */
-#define THREEbt (0x05) /* three bytes payload */
-#define FOURbt (0x06) /* four bytes payload, UTF-8 only, macros start at getBT0 */
-#define INVALID (PType 0x07) /* invalid byte sequence */
-#define UNDEF (PType 0x09) /* legal but undefined */
-#define ZERObt (PType 0x0A) /* zero bytes of payload, i.e. remove */
-#define FUNii (PType 0x0B) /* function from info to info */
-#define FUNsi (PType 0x0D) /* function from start to info */
-#define FUNio (PType 0x0E) /* function from info to output */
-#define FUNso (PType 0x0F) /* function from start to output */
-
-#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt))
-#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
-#define o3(b1,b2,b3) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt))
-#define o4(b0,b1,b2,b3) (PType((((unsigned char)(b1))<< 8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt))
-
-#define getBT1(a) (((a)>> 8)&0xFF)
-#define getBT2(a) (((a)>>16)&0xFF)
-#define getBT3(a) (((a)>>24)&0xFF)
-#define getBT0(a) ((((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */
-
-#define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii))
-
-/* do we need these??? maybe not, can be done with simple tables */
-#define ONETRAIL /* legal but undefined if one more trailing UTF-8 */
-#define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */
-#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
-
-/* dynamic structure, one per conversion (similar to iconv_t) */
-/* may carry conversion state (e.g. for iso-2022-jp) */
-typedef struct rb_transcoding {
- const struct rb_transcoder *transcoder;
- VALUE ruby_string_dest; /* the String used as the conversion destination,
- or NULL if something else is being converted */
- unsigned char *(*flush_func)(struct rb_transcoding*, int, int);
-} rb_transcoding;
-
-/* static structure, one per supported encoding pair */
-typedef struct rb_transcoder {
- const char *from_encoding;
- const char *to_encoding;
- const BYTE_LOOKUP *conv_tree_start;
- int max_output;
- int from_utf8;
- void (*preprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
- void (*postprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
- VALUE (*func_ii)(VALUE); /* info -> info */
- VALUE (*func_si)(const unsigned char *); /* start -> info */
- int (*func_io)(VALUE, const unsigned char*); /* info -> output */
- int (*func_so)(const unsigned char*, unsigned char*); /* start -> output */
-} rb_transcoder;
-
-void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
-void rb_register_transcoder(const rb_transcoder *);
-
-#endif /* RUBY_TRANSCODE_DATA_H */
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100508/7c03a144/attachment-0001.html>
More information about the macruby-changes
mailing list