[macruby-changes] [2225] MacRuby/branches/experimental/ext/libyaml/rubyext.c

source_changes at macosforge.org source_changes at macosforge.org
Wed Aug 5 13:20:51 PDT 2009


Revision: 2225
          http://trac.macosforge.org/projects/ruby/changeset/2225
Author:   pthomson at apple.com
Date:     2009-08-05 13:20:51 -0700 (Wed, 05 Aug 2009)
Log Message:
-----------
Further improvements, though there are still some pretty--serious GC bugs and issues with large inputs.

Modified Paths:
--------------
    MacRuby/branches/experimental/ext/libyaml/rubyext.c

Modified: MacRuby/branches/experimental/ext/libyaml/rubyext.c
===================================================================
--- MacRuby/branches/experimental/ext/libyaml/rubyext.c	2009-08-05 20:20:50 UTC (rev 2224)
+++ MacRuby/branches/experimental/ext/libyaml/rubyext.c	2009-08-05 20:20:51 UTC (rev 2225)
@@ -27,8 +27,8 @@
 	
 	VALUE *stack;				// the current object hierarchy that is being parsed.
 								// the root element is at stack[0].
-	uint32_t stack_index;		// what element are we currently adding to?
-	uint32_t stack_size;		// how big can the stack become?
+	int32_t stack_index;		// what element are we currently adding to?
+	int32_t stack_size;			// how big can the stack become?
 	
 	VALUE resolver;				// used to determine how to unserialize objects.
 	
@@ -63,7 +63,7 @@
 
 static struct mcache *to_yaml_cache = NULL;
 
-static const int DEFAULT_STACK_SIZE = 5;
+static const int DEFAULT_STACK_SIZE = 8;
 
 static VALUE
 rb_yaml_parser_alloc(VALUE klass, SEL sel)
@@ -88,39 +88,11 @@
 static int
 rb_yaml_io_read_handler(void *io_ptr, unsigned char *buffer, size_t size, size_t* size_read)
 {
-	VALUE io = (VALUE)io_ptr;
-	long result = rb_io_primitive_read(ExtractIOStruct(io), (UInt8*)buffer, size);
+	long result = rb_io_primitive_read(ExtractIOStruct(io_ptr), (UInt8*)buffer, size);
 	*size_read = result;
 	return (result != -1);
 }
 
-#if 0
-static void
-rb_yaml_guess_type_of_plain_node(yaml_node_t *node)
-{
-	const char* v = (char*) node->data.scalar.value;
-	if (node->data.scalar.length == 0)
-	{
-		node->tag = (yaml_char_t*)"tag:yaml.org,2002:null";
-	}
-	// holy cow, this is not a good solution at all.
-	// i should incorporate rb_cstr_to_inum here, or something.
-	else if (strtol(v, NULL, 10) != 0)
-	{
-		node->tag = (yaml_char_t*)"tag:yaml.org,2002:int";
-	}
-	else if (*v == ':')
-	{
-		node->tag = (yaml_char_t*)"tag:ruby.yaml.org,2002:symbol";
-	}
-	else if ((strcmp(v, "true") == 0) || (strcmp(v, "false") == 0))
-	{
-		node->tag = (yaml_char_t*)"tag:yaml.org,2002:bool";
-	} 
-}
-
-#endif
-
 static VALUE
 rb_yaml_parser_input(VALUE self, SEL sel)
 {
@@ -168,28 +140,60 @@
 }
 
 static VALUE
-rb_yaml_parser_error(VALUE self, SEL sel)
+rb_yaml_parser_generate_error(yaml_parser_t *parser)
 {
 	VALUE error = Qnil;
-	char *msg = NULL;
-	yaml_parser_t *parser = RYAMLParser(self)->parser;
 	assert(parser != NULL);
+	char *descriptor;
 	switch(parser->error)
 	{
+		case YAML_NO_ERROR:
+		return Qnil;
+		
 		case YAML_SCANNER_ERROR:
+		descriptor = "scanning";
+		break;
+		
 		case YAML_PARSER_ERROR:
-		{
-			asprintf(&msg, "syntax error on line %d, col %d: %s", parser->problem_mark.line,
-				parser->problem_mark.column, parser->problem);
-			error = rb_exc_new2(rb_eArgError, msg);
-		}
+		descriptor = "parsing";
+		break;
 		
-		case YAML_NO_ERROR:
+		case YAML_MEMORY_ERROR:
+		descriptor = "memory allocation";
 		break;
 		
+		case YAML_READER_ERROR:
+		descriptor = "reading";
+		break;
+		
 		default:
-		error = rb_exc_new2(rb_eRuntimeError, parser->problem);
+		descriptor = "unknown";
+		break;
 	}
+	
+	char *msg;
+	if(parser->problem != NULL)
+	{
+		if(parser->context != NULL)
+		{
+			asprintf(&msg, "%s error encountered during parsing: %s (line %d, column %d), context %s (line %d, column %d)",
+				descriptor, parser->problem, parser->problem_mark.line,
+				parser->problem_mark.column, parser->context,
+				parser->context_mark.line, parser->context_mark.column);
+		}
+		else
+		{
+			asprintf(&msg, "%s error encountered during parsing: %s (line %d, column %d)",
+				descriptor, parser->problem, parser->problem_mark.line,
+				parser->problem_mark.column);
+		}
+	}
+	else
+	{
+		asprintf(&msg, "%s error encountered during parsing", descriptor);
+	}
+	
+	error = rb_exc_new2(rb_eRuntimeError, msg);
 	if(msg != NULL)
 	{
 		free(msg);
@@ -197,140 +201,180 @@
 	return error;
 }
 
-static inline void
-delete_event(rb_yaml_parser_t *parser)
+static VALUE
+rb_yaml_parser_error(VALUE self, SEL sel)
 {
-	if (parser->event_valid)
-	{
-		yaml_event_delete(&parser->event);
-		parser->event_valid = false;
-	}
+	return rb_yaml_parser_generate_error(RYAMLParser(self)->parser);
 }
 
-static inline bool
-next_event(rb_yaml_parser_t *parser)
+
+static void push(rb_yaml_parser_t *parser, VALUE val) __attribute__ ((noinline));
+
+static void
+push(rb_yaml_parser_t *parser, VALUE val)
 {
-	delete_event(parser);
-	if (yaml_parser_parse(parser->parser, &parser->event) == -1)
+	parser->stack_index++;
+	if(parser->stack_index >= (parser->stack_size - 1))
 	{
-		rb_raise(rb_eRuntimeError, "parsing error"); // XXX: Make this more informative
-		parser->event_valid = false;
-	} else
-	{
-		parser->event_valid = true;
+		printf("Currently at index %d out of size %d\n", parser->stack_index, parser->stack_size);
+		parser->stack_size *= 2;
+		GC_WB(&parser->stack, xrealloc2(parser->stack, parser->stack_size, sizeof(VALUE)));
+		printf("Reallocated to size %d\n", parser->stack_size);
 	}
-	printf("Parsed event\n");
-	return parser->event_valid;
+	parser->stack[parser->stack_index] = val;
 }
 
-static void
-parse_scalar(rb_yaml_parser_t *parser)
+static VALUE rb_yaml_get_and_interpret_scalar(rb_yaml_parser_t *parser) __attribute__ ((noinline));
+
+static VALUE 
+rb_yaml_get_and_interpret_scalar(rb_yaml_parser_t *parser)
 {
-	VALUE current_item = parser->stack[parser->stack_index];
-	VALUE new_str = rb_str_new2((char*)parser->event.data.scalar.value);
-	if(NIL_P(current_item))
+	char *val = (char*)parser->event.data.scalar.value;
+	char *tag = (char*)parser->event.data.scalar.tag;
+	if ((parser->event.data.scalar.style == YAML_PLAIN_SCALAR_STYLE) && (tag == NULL))
 	{
-		parser->stack[parser->stack_index] = new_str;
+		if (parser->event.data.scalar.length == 0)
+		{
+			tag = "tag:yaml.org,2002:null";
+		}
+		else if (*val == ':')
+		{
+			tag = "tag:ruby.yaml.org,2002:symbol";
+		}
+		else if (strtol(val, NULL, 10) != 0)
+		// this is not a good solution. i should use rb_str_to_inum, which parses strings correctly.
+		{
+			tag = "tag:yaml.org,2002:int";
+		}
+		else
+		{
+			tag = "tag:yaml.org,2002:str";
+		}
 	}
-	else if (TYPE(current_item) == T_ARRAY)
+	if(tag == NULL)
 	{
-		rb_ary_push(current_item, new_str);
+		tag = "tag:yaml.org,2002:str";
 	}
-}
-
-static void
-push(rb_yaml_parser_t *parser, VALUE item)
-{
-	parser->stack_index += 1;
-	if (parser->stack_index >= parser->stack_size)
+	VALUE scalarval = rb_str_new(val, parser->event.data.scalar.length);
+	VALUE tags = rb_ivar_get(parser->resolver, id_tags_ivar);
+	VALUE handler = rb_hash_lookup(tags, rb_str_new2(tag));
+	if (rb_respond_to(handler, rb_intern("call")))
 	{
-		rb_raise(rb_eRuntimeError, "oh god the stack depth");
+		return rb_funcall(handler, rb_intern("call"), 1, scalarval);
 	}
-	parser->stack[parser->stack_index] = item;
+	else if (rb_respond_to(handler, rb_intern("yaml_new")))
+	{
+		return rb_funcall(handler, rb_intern("yaml_new"), 1, scalarval);
+	}
+	return scalarval;
 }
 
-static void pop(rb_yaml_parser_t *parser) __attribute__ ((noinline));
 
-static void
-pop(rb_yaml_parser_t *parser)
+static bool
+yaml_next_event(rb_yaml_parser_t *parser)
 {
-	if(parser->stack_index == 0)
+	if (parser->event_valid)
 	{
-		return;
+		yaml_event_delete(&parser->event);
+		parser->event_valid = false;
 	}
-	
-	printf("something is very wrong here.");
+	if (yaml_parser_parse(parser->parser, &parser->event) == -1)
+	{
+		rb_exc_raise(rb_yaml_parser_generate_error(parser->parser));
+		parser->event_valid = false;
+	} else
+	{
+		parser->event_valid = true;
+	}
+	return parser->event_valid;
 }
 
-static void
-parse(rb_yaml_parser_t *parser)
+#define NEXT_EVENT() yaml_next_event(parser)
+#define CURRENT_ITEM(p) p->stack[p->stack_index]
+
+static VALUE
+rb_yaml_parser_slurp(rb_yaml_parser_t *parser)
 {
-	if(!next_event(parser))
+	int current_depth = parser->stack_index;
+	NEXT_EVENT();
+	switch(parser->event.type)
 	{
-		return;
-	}
-	
-	if(parser->event.type != YAML_STREAM_START_EVENT)
-	{
-		rb_raise(rb_eRuntimeError, "expected STREAM_START event");
-	}
-	
-	for(;;) 
-	{
-		if(!next_event(parser))
+		case YAML_MAPPING_START_EVENT:;
+		push(parser, rb_hash_new());
+		assert(current_depth+1 == parser->stack_index);
+		for(;;)
 		{
-			break;
+			VALUE key = rb_yaml_parser_slurp(parser);
+			if(parser->event.type == YAML_MAPPING_END_EVENT)
+			{
+				parser->stack_index--;
+				break;
+			}
+			VALUE val = rb_yaml_parser_slurp(parser);
+			assert(TYPE(CURRENT_ITEM(parser)) == T_HASH);
+			rb_hash_aset(CURRENT_ITEM(parser), key, val);
 		}
-		switch(parser->event.type)
+		
+		break;
+
+		case YAML_SEQUENCE_START_EVENT:
+		push(parser, rb_ary_new());
+		assert(current_depth+1 == parser->stack_index);
+		for(;;)
 		{
-			case YAML_DOCUMENT_START_EVENT:
-			break;
+			VALUE item = rb_yaml_parser_slurp(parser);
+			if(parser->event.type == YAML_SEQUENCE_END_EVENT) 
+			{
+				parser->stack_index--;
+				break;
+			}
+			assert(TYPE(parser->stack[parser->stack_index]) == T_ARRAY);
+			rb_ary_push(parser->stack[parser->stack_index], item);
+		}
+		break;
 
-			case YAML_MAPPING_START_EVENT:
-			printf("oh god a mapping\n");
-			break;
+		case YAML_SCALAR_EVENT:
+		push(parser, rb_yaml_get_and_interpret_scalar(parser));
+		assert(current_depth+1 == parser->stack_index);
+		parser->stack_index--;
+		break;
 
-			case YAML_SEQUENCE_START_EVENT:
-			printf("oh god parsing a sequence");
-			push(parser, rb_ary_new());
-			break;
-
-			case YAML_SCALAR_EVENT:
-			printf("oh god parsing a scalar");
-			parse_scalar(parser);
-			break;
-
-			case YAML_ALIAS_EVENT:
-			printf("oh god an alias i don't even know how to do this\n");
-			break;
-				
-			case YAML_MAPPING_END_EVENT:
-			printf("oh god a mapping ended aaagh\n");
-			break;
+		case YAML_ALIAS_EVENT:
+		printf("oh god an alias i don't even know how to do this\n");
+		break;
 			
-			case YAML_SEQUENCE_END_EVENT:
-			pop(parser);
-			break;
+		case YAML_MAPPING_END_EVENT:
+		case YAML_SEQUENCE_END_EVENT:
+		return Qnil;
+		break;
 
-			case YAML_NO_EVENT:
-			rb_raise(rb_eRuntimeError, "expected an event, got nothing");
-			
-			default: break;
-		}
-		if (parser->event.type == YAML_DOCUMENT_END_EVENT)
-		{
-			break;
-		}
+		case YAML_STREAM_END_EVENT:
+		case YAML_DOCUMENT_END_EVENT:
+		break;
+		
+		default:
+		break;
+		
 	}
+	return parser->stack[parser->stack_index+1];
 }
 
-
 static VALUE
 rb_yaml_parser_load(VALUE self, SEL sel)
 {
 	rb_yaml_parser_t *parser = RYAMLParser(self);
-	parse(parser);
-	return parser->stack[0];
+	NEXT_EVENT();
+	if(parser->event.type != YAML_STREAM_START_EVENT)
+	{
+		rb_raise(rb_eRuntimeError, "expected STREAM_START event");
+	}
+	NEXT_EVENT();
+	if(parser->event.type != YAML_DOCUMENT_START_EVENT)
+	{
+		rb_raise(rb_eRuntimeError, "expected DOCUMENT_START event");
+	}
+	
+	return rb_yaml_parser_slurp(parser);
 }
 
 static IMP rb_yaml_parser_finalize_super = NULL; 
@@ -597,9 +641,9 @@
 	
 	rb_cParser = rb_define_class_under(rb_mLibYAML, "Parser", rb_cObject);
 	rb_objc_define_method(*(VALUE *)rb_cParser, "alloc", rb_yaml_parser_alloc, 0);
+	rb_objc_define_method(rb_cParser, "initialize", rb_yaml_parser_initialize, -1);
 	rb_objc_define_method(rb_cParser, "input", rb_yaml_parser_input, 0);
 	rb_objc_define_method(rb_cParser, "input=", rb_yaml_parser_set_input, 1);
-	rb_objc_define_method(rb_cParser, "initialize", rb_yaml_parser_initialize, -1);
 	// commented methods here are just unimplemented; i plan to put them in soon.
 	//rb_objc_define_method(rb_cParser, "encoding", rb_yaml_parser_encoding, 0);
 	//rb_objc_define_method(rb_cParser, "encoding=", rb_yaml_parser_set_encoding, 1);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20090805/165f0893/attachment-0001.html>


More information about the macruby-changes mailing list