[macruby-changes] [159] MacRuby/trunk/string.c
source_changes at macosforge.org
source_changes at macosforge.org
Mon Apr 21 14:43:26 PDT 2008
Revision: 159
http://trac.macosforge.org/projects/ruby/changeset/159
Author: lsansonetti at apple.com
Date: 2008-04-21 14:43:26 -0700 (Mon, 21 Apr 2008)
Log Message:
-----------
implementing #tr & #tr_s
Modified Paths:
--------------
MacRuby/trunk/string.c
Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c 2008-04-19 09:01:14 UTC (rev 158)
+++ MacRuby/trunk/string.c 2008-04-21 21:43:26 UTC (rev 159)
@@ -4884,10 +4884,344 @@
}
static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
+#endif
+#if WITH_OBJC
+typedef void str_charset_find_cb
+(CFRange *, const CFRange *, CFStringRef, UniChar, void *);
+
+static void
+str_charset_find(CFStringRef str, VALUE *charsets, int charset_count,
+ bool squeeze_mode, str_charset_find_cb *cb, void *ctx)
+{
+ int i;
+ long n;
+ bool changed;
+ CFMutableCharacterSetRef charset;
+ CFRange search_range, result_range;
+
+ if (charset_count == 0)
+ return;
+
+ n = CFStringGetLength((CFStringRef)str);
+ if (n == 0)
+ return;
+
+ for (i = 0, charset = NULL; i < charset_count; i++) {
+ VALUE s = charsets[i];
+ bool exclude;
+ const char *sptr, *p;
+
+ StringValue(s);
+
+ sptr = RSTRING_CPTR(s);
+ exclude = sptr[0] == '^';
+
+ p = NULL;
+ if (exclude || (p = strchr(sptr, '-')) != NULL) {
+ CFMutableCharacterSetRef subset;
+ const char *b, *e;
+
+ b = exclude ? sptr + 1 : sptr;
+ e = sptr + strlen(sptr) - 1;
+ subset = CFCharacterSetCreateMutable(NULL);
+ while (p != NULL) {
+ if (p > b && *(p - 1) != '\\' && *(p + 1) != '\0') {
+ CFCharacterSetAddCharactersInRange(subset,
+ CFRangeMake(*(p - 1), *(p + 1) - *(p - 1) + 1));
+ }
+ if (p > b) {
+ CFStringRef substr;
+ substr = CFStringCreateWithBytes(NULL,
+ (const UInt8 *)b,
+ (CFIndex)p - (CFIndex)b,
+ kCFStringEncodingUTF8,
+ false);
+ CFCharacterSetAddCharactersInString(subset, substr);
+ CFRelease(substr);
+ }
+
+ b = p + 2;
+ p = strchr(b, '-');
+ }
+ if (b <= e) {
+ CFStringRef substr;
+ substr = CFStringCreateWithBytes(NULL,
+ (const UInt8 *)b,
+ (CFIndex)e - (CFIndex)b + 1,
+ kCFStringEncodingUTF8,
+ false);
+ CFCharacterSetAddCharactersInString(subset, substr);
+ CFRelease(substr);
+ }
+
+ if (exclude)
+ CFCharacterSetInvert(subset);
+
+ if (charset == NULL) {
+ charset = subset;
+ }
+ else {
+ CFCharacterSetIntersect(charset, subset);
+ CFRelease(subset);
+ }
+ }
+ else {
+ if (charset == NULL) {
+ charset = CFCharacterSetCreateMutable(NULL);
+ CFCharacterSetAddCharactersInString(charset, (CFStringRef)s);
+ }
+ else {
+ CFCharacterSetRef subset;
+ subset = CFCharacterSetCreateWithCharactersInString(NULL,
+ (CFStringRef)s);
+ CFCharacterSetIntersect(charset, subset);
+ CFRelease(subset);
+ }
+ }
+ }
+
+ search_range = CFRangeMake(0, n);
+#if 0
+ while (search_range.length != 0
+ && CFStringFindCharacterFromSet(
+ (CFStringRef)str,
+ (CFCharacterSetRef)charset,
+ search_range,
+ 0,
+ &result_range)) {
+ (*cb)(&search_range, (const CFRange *)&result_range, str, ctx);
+ }
+#else
+ CFStringInlineBuffer buf;
+ UniChar previous_char = 0;
+ CFStringInitInlineBuffer((CFStringRef)str, &buf, search_range);
+ do {
+ long i;
+ bool mutated = false;
+
+ if (search_range.location + search_range.length < n) {
+ n = search_range.location + search_range.length;
+ CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, n));
+ }
+
+ result_range.length = 0;
+
+ for (i = search_range.location;
+ i < search_range.location + search_range.length;
+ i++) {
+
+ UniChar c;
+
+ c = CFStringGetCharacterFromInlineBuffer(&buf, i);
+ if (CFCharacterSetIsCharacterMember((CFCharacterSetRef)charset,
+ c)) {
+ if (result_range.length == 0) {
+ result_range.location = i;
+ result_range.length = 1;
+ previous_char = c;
+ }
+ else {
+ if (result_range.location + result_range.length == i
+ && (!squeeze_mode || previous_char == c)) {
+ result_range.length++;
+ }
+ else {
+ (*cb)(&search_range, (const CFRange *)&result_range,
+ str, previous_char, ctx);
+ result_range.location = i;
+ result_range.length = 1;
+ previous_char = c;
+ if (search_range.location + search_range.length < n) {
+ result_range.location -= n
+ - (search_range.location + search_range.length);
+ mutated = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (!mutated) {
+ if (result_range.length != 0) {
+ (*cb)(&search_range, (const CFRange *)&result_range, str,
+ previous_char, ctx);
+ result_range.length = 0;
+ previous_char = 0;
+ }
+ }
+ }
+ while (search_range.length != 0 && result_range.length != 0);
+#endif
+
+ CFRelease(charset);
+}
+
+struct tr_trans_cb_ctx {
+ VALUE orepl;
+ const char *src;
+ long src_len;
+ const char *repl;
+ long repl_len;
+ int sflag;
+ bool changed;
+ CFStringRef opt;
+};
+
+static inline void
+trans_replace(CFMutableStringRef str, const CFRange *result_range,
+ CFStringRef substr, CFRange *search_range, int sflag)
+{
+ if (sflag == 0) {
+ long n;
+ for (n = result_range->location;
+ n < result_range->location + result_range->length;
+ n++)
+ CFStringReplace(str, CFRangeMake(n, 1), substr);
+ }
+ else {
+ CFStringReplace(str, *result_range, substr);
+ search_range->length -= result_range->length
+ + (result_range->location - search_range->location) - 1;
+ search_range->location = result_range->location + 1;
+ }
+}
+
+static void
+rb_str_trans_cb(CFRange *search_range, const CFRange *result_range,
+ CFStringRef str, UniChar character, void *ctx)
+{
+ struct tr_trans_cb_ctx *_ctx;
+
+ _ctx = (struct tr_trans_cb_ctx *)ctx;
+ if (_ctx->repl_len == 0) {
+ CFStringDelete((CFMutableStringRef)str, *result_range);
+ search_range->length -= result_range->length
+ + (result_range->location - search_range->location);
+ search_range->location = result_range->location;
+ }
+ else if (_ctx->repl_len == 1) {
+ trans_replace((CFMutableStringRef)str, result_range,
+ (CFStringRef)_ctx->orepl, search_range, _ctx->sflag);
+ }
+ else if (_ctx->repl_len > 1) {
+ if (_ctx->src_len == 1) {
+ if (_ctx->opt == NULL) {
+ _ctx->opt = CFStringCreateWithBytes(NULL,
+ (const UInt8 *)_ctx->repl, 1, kCFStringEncodingUTF8,
+ false);
+ }
+ trans_replace((CFMutableStringRef)str, result_range,
+ (CFStringRef)_ctx->opt, search_range, _ctx->sflag);
+ }
+ else {
+ /* TODO: support all syntaxes */
+ char sb, se, rb, re;
+ long n;
+ bool s_is_range, r_is_range;
+ CFStringRef substr;
+ bool release_substr;
+ long delta;
+
+ if (_ctx->src_len == 3 && _ctx->src[1] == '-') {
+ sb = _ctx->src[0];
+ se = _ctx->src[2];
+ s_is_range = true;
+ }
+ else {
+ s_is_range = false;
+ if (_ctx->src[0] == '^' || strchr(_ctx->src, '-') != NULL)
+ rb_raise(rb_eRuntimeError, "src argument value (%s) not " \
+ "supported yet", _ctx->src);
+ }
+
+ if (_ctx->repl_len == 3 && _ctx->repl[1] == '-') {
+ rb = _ctx->repl[0];
+ re = _ctx->repl[2];
+ r_is_range = true;
+ }
+ else {
+ r_is_range = false;
+ if (_ctx->repl[0] == '^' || strchr(_ctx->repl, '-') != NULL)
+ rb_raise(rb_eRuntimeError, "repl argument value (%s) not " \
+ "supported yet", _ctx->repl);
+ }
+
+ if (s_is_range) {
+ assert(sb <= character && se >= character);
+ delta = character - sb;
+ }
+ else {
+ char *p;
+ p = strchr(_ctx->src, character);
+ assert(p != NULL);
+ delta = (long)p - (long)_ctx->src;
+ }
+
+ if ((r_is_range && delta > (re - rb))
+ || (!r_is_range && delta > _ctx->repl_len)) {
+ if (_ctx->opt == NULL) {
+ _ctx->opt = CFStringCreateWithBytes(NULL,
+ (const UInt8 *)&_ctx->repl[_ctx->repl_len - 1],
+ 1,
+ kCFStringEncodingUTF8,
+ false);
+ }
+ substr = _ctx->opt;
+ release_substr = false;
+ }
+ else {
+ const char r = r_is_range
+ ? rb + delta : _ctx->repl[delta];
+ substr = CFStringCreateWithBytes(NULL, (const UInt8 *)&r, 1,
+ kCFStringEncodingUTF8, false);
+ release_substr = true;
+ }
+
+ trans_replace((CFMutableStringRef)str, result_range,
+ (CFStringRef)substr, search_range, _ctx->sflag);
+
+ if (release_substr)
+ CFRelease(substr);
+ }
+ }
+ _ctx->changed = true;
+}
+#endif
+
static VALUE
tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
{
+#if WITH_OBJC
+ struct tr_trans_cb_ctx _ctx;
+
+ StringValue(src);
+ StringValue(repl);
+
+ if (RSTRING_CLEN(str) == 0)
+ return Qnil;
+
+ _ctx.orepl = repl;
+ _ctx.src = RSTRING_CPTR(src);
+ _ctx.repl = RSTRING_CPTR(repl);
+
+ /* TODO: support non-8-bit src/repl */
+ assert(_ctx.src != NULL && _ctx.repl != NULL);
+
+ _ctx.src_len = strlen(_ctx.src);
+ _ctx.repl_len = strlen(_ctx.repl);
+ _ctx.sflag = sflag;
+ _ctx.changed = false;
+ _ctx.opt = NULL;
+
+ str_charset_find((CFStringRef)str, &src, 1, _ctx.repl_len > 1,
+ rb_str_trans_cb, &_ctx);
+
+ if (_ctx.opt != NULL)
+ CFRelease(_ctx.opt);
+
+ return _ctx.changed ? str : Qnil;
+#else
SIGNED_VALUE trans[256];
rb_encoding *enc, *e1, *e2;
struct tr trsrc, trrepl;
@@ -5083,8 +5417,8 @@
return str;
}
return Qnil;
+#endif
}
-#endif
/*
* call-seq:
@@ -5098,12 +5432,7 @@
static VALUE
rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
{
-#if WITH_OBJC
- /* TODO */
- rb_notimplement();
-#else
return tr_trans(str, src, repl, 0);
-#endif
}
@@ -5128,11 +5457,7 @@
rb_str_tr(VALUE str, VALUE src, VALUE repl)
{
str = rb_str_dup(str);
-#if WITH_OBJC
- rb_notimplement();
-#else
- tr_trans(str, src, repl, 0);
-#endif
+ rb_str_tr_bang(str, src, repl);
return str;
}
@@ -5212,177 +5537,8 @@
#else
-typedef void str_charset_find_cb
-(CFRange *, const CFRange *, CFStringRef, void *);
-
-static void
-str_charset_find(CFStringRef str, VALUE *charsets, int charset_count,
- bool squeeze_mode, str_charset_find_cb *cb, void *ctx)
-{
- int i;
- long n;
- bool changed;
- CFMutableCharacterSetRef charset;
- CFRange search_range, result_range;
-
- if (charset_count == 0)
- return;
- n = CFStringGetLength((CFStringRef)str);
- if (n == 0)
- return;
-
- for (i = 0, charset = NULL; i < charset_count; i++) {
- VALUE s = charsets[i];
- bool exclude;
- const char *sptr, *p;
-
- StringValue(s);
-
- sptr = RSTRING_CPTR(s);
- exclude = sptr[0] == '^';
-
- p = NULL;
- if (exclude || (p = strchr(sptr, '-')) != NULL) {
- CFMutableCharacterSetRef subset;
- const char *b, *e;
-
- b = exclude ? sptr + 1 : sptr;
- e = sptr + strlen(sptr) - 1;
- subset = CFCharacterSetCreateMutable(NULL);
- while (p != NULL) {
- if (p > b && *(p - 1) != '\\' && *(p + 1) != '\0') {
- CFCharacterSetAddCharactersInRange(subset,
- CFRangeMake(*(p - 1), *(p + 1) - *(p - 1) + 1));
- }
- if (p > b) {
- CFStringRef substr;
- substr = CFStringCreateWithBytes(NULL,
- (const UInt8 *)b,
- (CFIndex)p - (CFIndex)b,
- kCFStringEncodingUTF8,
- false);
- CFCharacterSetAddCharactersInString(subset, substr);
- CFRelease(substr);
- }
-
- b = p + 2;
- p = strchr(b, '-');
- }
- if (b <= e) {
- CFStringRef substr;
- substr = CFStringCreateWithBytes(NULL,
- (const UInt8 *)b,
- (CFIndex)e - (CFIndex)b + 1,
- kCFStringEncodingUTF8,
- false);
- CFCharacterSetAddCharactersInString(subset, substr);
- CFRelease(substr);
- }
-
- if (exclude)
- CFCharacterSetInvert(subset);
-
- if (charset == NULL) {
- charset = subset;
- }
- else {
- CFCharacterSetIntersect(charset, subset);
- CFRelease(subset);
- }
- }
- else {
- if (charset == NULL) {
- charset = CFCharacterSetCreateMutable(NULL);
- CFCharacterSetAddCharactersInString(charset, (CFStringRef)s);
- }
- else {
- CFCharacterSetRef subset;
- subset = CFCharacterSetCreateWithCharactersInString(NULL,
- (CFStringRef)s);
- CFCharacterSetIntersect(charset, subset);
- CFRelease(subset);
- }
- }
- }
-
- search_range = CFRangeMake(0, n);
-#if 0
- while (search_range.length != 0
- && CFStringFindCharacterFromSet(
- (CFStringRef)str,
- (CFCharacterSetRef)charset,
- search_range,
- 0,
- &result_range)) {
- (*cb)(&search_range, (const CFRange *)&result_range, str, ctx);
- }
-#else
- CFStringInlineBuffer buf;
- UniChar previous_char = 0;
- CFStringInitInlineBuffer((CFStringRef)str, &buf, search_range);
- do {
- long i;
- bool mutated = false;
-
- if (search_range.location + search_range.length < n) {
- n = search_range.location + search_range.length;
- CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, n));
- }
-
- result_range.length = 0;
-
- for (i = search_range.location;
- i < search_range.location + search_range.length;
- i++) {
-
- UniChar c;
-
- c = CFStringGetCharacterFromInlineBuffer(&buf, i);
- if (CFCharacterSetIsCharacterMember((CFCharacterSetRef)charset,
- c)) {
- if (result_range.length == 0) {
- result_range.location = i;
- result_range.length = 1;
- previous_char = c;
- }
- else {
- if (result_range.location + result_range.length == i
- && (!squeeze_mode || previous_char == c)) {
- result_range.length++;
- }
- else {
- (*cb)(&search_range, (const CFRange *)&result_range,
- str, ctx);
- result_range.location = i;
- result_range.length = 1;
- previous_char = c;
- if (search_range.location + search_range.length < n) {
- result_range.location -= n
- - (search_range.location + search_range.length);
- mutated = true;
- break;
- }
- }
- }
- }
- }
- if (!mutated) {
- if (result_range.length != 0) {
- (*cb)(&search_range, (const CFRange *)&result_range, str,
- ctx);
- result_range.length = 0;
- previous_char = 0;
- }
- }
- }
- while (search_range.length != 0 && result_range.length != 0);
#endif
- CFRelease(charset);
-}
-
-#endif
-
/*
* call-seq:
* str.delete!([other_str]+) => str or nil
@@ -5394,7 +5550,7 @@
#if WITH_OBJC
static void
rb_str_delete_bang_cb(CFRange *search_range, const CFRange *result_range,
- CFStringRef str, void *ctx)
+ CFStringRef str, UniChar character, void *ctx)
{
CFStringDelete((CFMutableStringRef)str, *result_range);
search_range->length -= result_range->length
@@ -5498,7 +5654,7 @@
#if WITH_OBJC
static void
rb_str_squeeze_bang_cb(CFRange *search_range, const CFRange *result_range,
- CFStringRef str, void *ctx)
+ CFStringRef str, UniChar character, void *ctx)
{
if (result_range->length > 1) {
CFRange to_delete = *result_range;
@@ -5614,11 +5770,7 @@
static VALUE
rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
{
-#if WITH_OBJC
- rb_notimplement();
-#else
return tr_trans(str, src, repl, 1);
-#endif
}
@@ -5663,7 +5815,7 @@
#if WITH_OBJC
static void
rb_str_count_cb(CFRange *search_range, const CFRange *result_range,
- CFStringRef str, void *ctx)
+ CFStringRef str, UniChar character, void *ctx)
{
(*(int *)ctx) += result_range->length;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.macosforge.org/pipermail/macruby-changes/attachments/20080421/862201dc/attachment-0001.html
More information about the macruby-changes
mailing list